
clear all
set more off

global charter "$lotto"
global save "$raw\saves\school by year files"
global dir "$lotto\1_id matching"


cd "$save"
 
*****switches*******
global newdata		"1"  // new boston and state data
global bostondata   "1" //original boston data
global match		"1" // final fuzzy matching attempt
global cleanup		"1"		// final organization of data

if "${newdata}"=="1"{
cap prog drop inputcharter
prog define inputcharter 
	syntax , path(string) short(string)
	insheet using "`path'", names clear double
	di "Insheeted `short'"
    cd "$save"
	drop if school==""
	save `short'_raw.dta, replace
	cap drop if obs==. /* get rid of the sumstats row and blank observations */
	cap drop  obs 
	cap drop  gender 
	cap drop raceethnicity 
	*cap drop caplast 
	*cap drop capfirst
	cap drop lastname 
	cap drop firstname 
	cap drop middlename 
	cap drop birthdate 
	cap drop offerdate 
	cap drop statasasid 
	cap drop townofresidence
	cap drop siblingfirstname 
	cap drop siblinglast  
	cap drop testyear
	cap drop dateaddedtowaitlist
	cap drop statasasid2 
	*EMS added 8-15-2013
		cap drop fuzzysasid 
		cap g double sasid=combinedsasid
		cap drop combinedsasid	
		cap drop combo_sasid
	cap drop siblingmiddle 
	cap drop notes
	cap drop schoolid 
	cap drop enrolllossofseatdate
	cap drop lotterynumber_state 
	cap drop waitlistnumber_state	
	*cap drop stschool ndschool rdschool
	cap drop  initialofferroxp initialoffergh initialofferdp everofferroxp everoffergh everofferdp	
	*CW:  Fix messed up case in CoaH '09
	cap replace sasid="" if sasid=="*1096532213&1077005024"
	cap replace sasid="" if sasid=="x"
	cap destring sasid, replace
	gen dis=1
	cap replace dis=0 if disqualified=="0"|disqualified==""
	cap replace dis=0 if disqualified==0|disqualified==.
	drop disqualified
	ren dis disqualified
	cap tostring  siblingstatus, replace
	cap tostring  siblingstatustype, replace
	cap tostring  outofarea, replace
	tostring lateapplicant, replace
	cap tostring  disqualified, replace
	cap tostring  offeredadmissioninitially, replace
	cap tostring offeredadmissionever, replace
	cap tostring lotterynum, replace
	cap tostring waitlistnum, replace
	cap tostring date, replace
	cap tostring cantmatch, replace
	cap tostring stschool, replace
	cap tostring ndschool, replace
	cap tostring rdschool, replace
	cap tostring dob, replace
	cap replace dob="" if dob=="."
	cap tostring updatedinitialofferroxp, replace 
	cap tostring updatedinitialoffergh, replace
	cap tostring updatedinitialofferdp, replace
	cap tostring updatedeverofferroxp, replace
	cap tostring updatedeveroffergh, replace
	cap tostring updatedeverofferdp, replace
	cap tostring prioritygroupnumber, replace
	cap tostring offerredadmission2ndlotteryever, replace
	cap tostring secondlottery, replace
	cap drop v35 
	cap drop v36 
	cap drop v37 
	cap drop v38
	cap drop v41
	cap tostring dateoflottery, replace
	cap destring basedonschool, replace
	cap drop dateoffered
	foreach vardrop in lotterynumber_state offeroffwaitlist dateaccepted septemberlottery septembereveroffer ///
		campusassignment statsasid comments waitlistfile enrolled declined combined k2 k2lottery k2decline ///
		k2ineligible sheet13 k1 k1lottery k1decline k1ineligible status reconciliation cantmatch siblnglastname ///
		incodmanapps orderofdrawing admissionsstatus orange updates statamismatched {
		cap drop `vardrop'
	}	
	cap ren 
	cap drop lottery if school=="COAHII" // COAH II 2013
	cap drop v39 
	cap drop v40
	cap drop v9
	cap drop v43
	cap drop v29
	cap drop idnumber
	cap drop accept*
	cap drop wl1_5
	cap drop denied_removed
	cap drop other
	cap drop siblingnote
	cap drop offerpending
	cap drop rematched
	cap drop residency
	cap drop bc
	cap drop column*
	cap drop wlnumber
	cap drop lotteryres*
	cap drop appdate
	cap drop flag
	cap drop rematched
	cap drop abovebar
	cap drop wl
	cap drop assignedroxp assignedgh assigneddp hascampusassignment nocampusassignment
	cap ren offeredadmissioninitianlly offeredadmissioninitially
	
	
	foreach v in roxp gh dp {
		cap destring initialoffer`v', replace
		cap destring everoffer`v', replace
		cap replace initial_offer = initialoffer`v' if (initialoffer`v' !=. & initialoffer`v' != 0)
		cap replace offer = everoffer`v' if (everoffer`v' !=. & everoffer`v' != 0)
	}
	
	*sibs
	qui replace siblingstatus="1" if siblingstatus=="1"|siblingstatus=="x"|siblingstatus=="yes"|siblingstatus=="Yes"
	qui replace siblingstatus="0" if siblingstatus==""|siblingstatus=="no"|siblingstatus=="No"
	qui gen sibling=0
	qui replace sibling=1 if siblingstatustype=="enrolled"|siblingstatustype=="Enrolled"
	qui replace sibling=1 if siblingstatus=="1"
	qui replace sibling=1 if siblingstatustype==""&siblingstatus=="1"
	qui gen siblingapplying=0
	qui replace siblingapplying=1 if siblingstatustype=="applying"|siblingstatustype=="Applying" ///
		|siblingstatustype=="Applying at same time" |siblingstatustype=="Applying twins" ///
		|siblingstatustype=="Applying/Newly admitted" |siblingstatustype=="applied" |siblingstatustype=="Applied"
	qui replace sibling=0 if siblingapplying==1
	cap replace disqualified ="1" if disqualified=="4th grade (not eligible for 6th)"|disqualified=="Retained in 5th grade (not eligible for 6th)" ///
		|disqualified=="no other data" | disqualified=="Withdrew application prior to lottery" ///
		|disqualified=="Out of area"|mismatched=="1"
	cap drop mismatched	

	local vars outofarea lateapplicant offeredadmissioninitially offeredadmissionever disqualified
	foreach v of local vars {
		cap replace `v'="0" if `v'==""|`v'=="No"|`v'=="no" | `v'=="$charter\bridgeboston\formatted\Spring"
		cap replace `v'="1" if `v'=="x"|`v'=="X"|`v'=="Yes"|`v'=="yes"
		cap destring `v', replace
		cap replace `v'=0 if `v'==.
		}
		
	cap ren offeredadmissionever offer
	cap ren offeredadmissioninitially initial_offer
	cap replace initial_offer = initialoffer if initial_offer==.&intialoffer!=.
	qui drop siblingstatus siblingstatustype
	ren applicationyear year
	ren gradeapplying grade
	
	gen `short'=1
		
	cap format sasid %12.0f
	save `short'.dta , replace
end

local shortindex

*CW 1 27 2012:  ADD NEW PATHS HERE

#delimit ;
global pathnames 
`"	
		"$charter\academy of the pacific rim\formatted\APR 2005 matched_FUZZY.csv" 
		"$charter\academy of the pacific rim\formatted\APR Spring 2006 matched_FUZZY.txt" 
		"$charter\academy of the pacific rim\formatted\APR Spring 2007 id matched 6th_FUZZY.txt" 
		"$charter\academy of the pacific rim\formatted\APR spring 2007 5th grade stata matched_FUZZY_with_fixedsasids.txt" 
		"$charter\academy of the pacific rim\formatted\APR spring 2008 5th grade stata matched_FUZZY.txt"		
		"$charter\academy of the pacific rim\formatted\APR Spring 2009 Lottery matched_FUZZY.txt"		
		"$charter\academy of the pacific rim\formatted\APR_2010_Matched_FUZZY.txt"	
		"$charter\academy of the pacific rim\formatted\Spring 2011\APR Formatted Matched RECODED.csv"
		"$charter\academy of the pacific rim\formatted\Spring 2012\APR 2012 MATCHED.csv"
		"$charter\academy of the pacific rim\formatted\Spring 2013\APR Sp2013 Formatted Matched FIXED.txt"
			
		"$charter\boston collegiate\formatted\Boston Collegiate Spring 2007 Lottery matched_FUZZY.txt"  
		"$charter\boston collegiate\formatted\Boston Collegiate Spring 2008 Lottery matched_FUZZY_with_fixedsasids.txt"  
		"$charter\boston collegiate\formatted\Boston Collegiate Spring 2009 Lottery matched_FUZZY_with_fixedsasids.txt"
		"$charter\boston collegiate\formatted\Boston_Collegiate_2010_Matched_FUZZY.txt"
		"$charter\boston collegiate\formatted\Spring 2011\Collegiate Formatted Matched.txt"	
		"$charter\boston collegiate\formatted\Spring2012\BosCol 2012 MATCHED.txt"
		"$charter\boston collegiate\formatted\Spring 2013\Boston Collegiate Sp2013 Formatted Matched Updated v7.txt"
		"$charter\boston collegiate\formatted\Spring 2014\BosCol Sp2014 Formatted Matched updated v3 Handmatching.txt"
		
		"$charter\boston green academy\matched\BGA 2011 July_matched_with priority group info.txt"
		"$charter\boston green academy\formatted\Spring2012\BGASpring2012_Matched.csv"
		"$charter\boston green academy\formatted\Spring2013\BGA Sp2013 Formatted Matched_with_fixedsasids.txt"
		"$charter\boston green academy\formatted\Spring2014\BGA Sp2014 formatted matched handmatching.txt"
		
		"$charter\boston prep\formatted\Boston Prep Spring 2008 Lottery - matched_FUZZY.txt"  
		"$charter\boston prep\formatted\Boston Prep Spring 2009 Lottery matchednew_FUZZY.txt"
		"$charter\boston prep\formatted\Boston_Prep_2010_Matched_FUZZY_with_fixedsasids.txt"
		"$charter\boston prep\formatted\Boston Prep 2011 Lottery Matched RECODED.csv"
		"$charter\boston prep\formatted\Spring 2012\Bos Prep 2012 formatted MATCHED.txt"
		"$charter\boston prep\formatted\Spring 2013\Boston Prep Sp2013 Formatted Matched.txt"
		"$charter\boston prep\formatted\Spring 2014\Boston Prep Spring 2014 Gr 6 Formatted Matched Handmatching.txt"
		"$charter\boston prep\formatted\Spring 2014\Boston Prep Spring 2014 Gr 9 Formatted Matched Handmatching.txt"

		"$charter\city on a hill\formatted\City on a Hill Spring 2007 Lottery - matched_FUZZY_with_fixedsasids.txt"  
		"$charter\city on a hill\formatted\City on a Hill Spring 2008 Lottery matched_FUZZY_with_fixedsasids.txt"		
		"$charter\city on a hill\formatted\COAH_2009_Matched_RECODED_FUZZY.txt"
		"$charter\city on a hill\formatted\Spring2010\CH Formatted 2010 Matched.csv"
		"$charter\city on a hill\formatted\Spring2011\CH 2011 Lottery Matched.txt"  
		"$charter\city on a hill\formatted\Spring2012\COAH Sp2012 Matched.txt"  
		"$charter\city on a hill\formatted\Spring2013 - CoaH I\CoaH I Sp2013 Formatted Matched fixed.txt"
		"$charter\city on a hill\formatted\Spring 2014 CoaH I\CoaH I Spring 2014 Gr 9 Formatted Matched.txt"
				
		"$charter\city on a hill\formatted\Spring2013 - CoaH II\CoaH II Sp2013 Formatted Matched.txt"
		"$charter\city on a hill\formatted\Spring 2014 CoaH II\CoaH II Spring 2014 Gr 9 Formatted Matched.txt"
	
		"$charter\codman\formatted\Codman Spring 2008 Lottery - matchednew_FUZZY.txt"
		"$charter\codman\formatted\Codman_2009_Matched_FUZZY.txt"	
		"$charter\codman\formatted\Codman Spring 2010 Lottery MATCHED RECODED.csv"
		"$charter\codman\formatted\Spring2011\Codman Formatted Spring 2011 RECODED MATCHED_with_fixedsasids.txt"
		"$charter\codman\formatted\Spring2012\Codman 2012 MATCHED.txt"
		"$charter\codman\formatted\Spring2013\Codman Sp2013 Formatted Matched.txt"
		"$charter\codman\formatted\Spring 2014\Codman Spring 2014 Grade 9 Formatted Matched Handmatching.txt"
		
		"$charter\codman\formatted\Spring 2014 ES\Codman Spring 2014 Gr 5 Formatted Matched Handmatching.txt"
		"$charter\codman\formatted\Spring 2014\Codman Spring 2014 Grade 6 Formatted Matched Handmatching.txt"
		
		
		"$charter\dorchester collegiate\formatted\Spring2012\DCSpring2012 Matched.txt"
				
		"$charter/edward brooke I_roslindale/formatted/Edward Brooke Spring 2007 Lottery - matched_FUZZY.txt"  
		"$charter/edward brooke I_roslindale/formatted/Edward Brooke Spring 2008 Lottery - matched_FUZZY.csv"  
		"$charter/edward brooke I_roslindale/formatted/Edward Brooke Spring 2009 Lottery - matched_FUZZY.txt"
	
		"$charter\edward brooke II_mattapan\formatted\Spring 2011\Brook2 2011 Matched_with_fixedsasids.txt"
		"$charter\edward brooke II_mattapan\formatted\Spring 2012\Brooke2 2012 Formatted Matched.txt"
		"$charter\edward brooke II_mattapan\formatted\Spring 2013\Brooke Mattapan Sp2013 Formatted Matched fixed_with_fixedsasids.txt"
		"$charter\edward brooke II_mattapan\formatted\Spring 2014\Brooke2 2014 Gr 5 Formatted Matched fixed.txt"

		"$charter\edward brooke III_east boston\formatted\2012-2012\BEB Formatted Matched.txt"
		"$charter\edward brooke III_east boston\formatted\Spring 2013\BrookeIII_east boston Sp2013 Formatted Matched.txt"
		"$charter\edward brooke III_east boston\formatted\Spring 2014\Brooke3 2014 Gr 5 Formatted Matched fixed.txt"

		"$charter/excel academy I_east boston/formatted/Excel Spring 2008 matched_FUZZY.txt"  
		"$charter/excel academy I_east boston/formatted/Excel Spring 2009 5th Grade Lottery matched_FUZZY.txt"
		"$charter/excel academy I_east boston/formatted/Excel_2010_Matched_RECODED.txt"
		"$charter/excel academy I_east boston/formatted/Spring 2011/Excel I EB Sp2011 Matched.txt"
		"$charter\excel academy I_east boston\formatted\Spring 2012\Excel I East Boston 2012 Matched.txt"
		"$charter\excel academy I_east boston\formatted\Spring 2013\Excel Academy I (East Boston) Sp2013 Formatted Matched.txt"
		
		"$charter\excel academy III_orient heights\formatted\Spring 2012\Excel OH Sp2012 Matched.txt"
		"$charter\excel academy III_orient heights\formatted\Spring 2013\Excel Academy III (Orient Heights) Sp2013 Formatted Matched.txt"
		
		"$charter\grove hall prep\formatted\Spring2011\Grove Hall 2011 Formatted Matched_with_fixedsasids.txt"	

		"$charter\kipp boston\formatted\KIPP Boston Spring 2012 MATCHED.txt"
		"$charter\kipp boston\formatted\Spring 2013\KIPPAcademyBoston Sp2013 Formatted Matched.txt"
		"$charter\kipp boston\formatted\Spring 2014\KIPPAcademyBoston Spring 2014 Gr 5 Formatted Matched.txt"		
		
		"$charter\match hs and ms\formatted\MATCH HS Spring 2007 Lottery matched_FUZZY_with_fixedsasids.txt"  
		"$charter/match hs and ms/formatted/MATCH HS Spring 2008 9th Grade Lottery matched RECODED_FUZZY.txt"
		"$charter\match hs and ms\formatted\MATCH_HS 2009_Matched_RECODED_FUZZY.txt"	
		"$charter\match hs and ms\formatted\Spring2010_HS\MatchHS Spring2010 MATCHED.txt"
						
		"$charter/match hs and ms/formatted/MATCH MS Spring 2008 Lottery - matched_FUZZY_with_fixedsasids.txt" 
		"$charter/match hs and ms/formatted/MATCH MS Spring 2009 6th Grade Lottery matchednew_FUZZY_with_fixedsasids.txt"
		"$charter/match hs and ms/formatted/MATCH_MS_2010_Matched RECODED_FUZZY.txt"
		"$charter\match hs and ms\formatted\Spring2011_MS\MatchMS Spring2011 MATCHED.txt"
		"$charter\match hs and ms\formatted\Spring2012_MS\Match MS 2012 Matched.txt"
		"$charter\match hs and ms\formatted\Spring2013_MS\Match MS Sp2013 Formatted Matched.txt"
		"$charter\match hs and ms\formatted\Spring 2014 MS\Match MS Spring 2014 Gr 6 Formatted Matched.txt"
		
		"$charter/roxbury prep/formatted/Roxbury Prep Spring 2006 Lottery matched_FUZZY.txt"
		"$charter/roxbury prep/formatted/Roxbury Prep Spring 2007 Lottery matched_FUZZY.txt"
		"$charter/roxbury prep/formatted/Roxbury Prep Spring 2008 Lottery matched_FUZZY.txt"
		"$charter/roxbury prep/formatted/Roxbury Prep Spring 2009 Grade 6 Lottery matched RECODED_FUZZY.txt"
		"$charter/roxbury prep/formatted/Rox Prep Spring 2010 RECODED MATCHED_FUZZY.txt"
		"$charter\roxbury prep\formatted\Rox Prep Spring 2011 Matched_with_fixedsasids.txt"
		
		"$charter\uncommon schools aka roxprep\formatted\Spring2012\UncommonSchools2012 Matched updated.txt"
		"$charter\uncommon schools aka roxprep\formatted\Spring 2013\Uncommon Schools Sp2013 Formatted Matched UpdatedOffers_fewervarsforCSV.txt"
		"$charter\uncommon schools aka roxprep\formatted\Spring 2014\UCS Spring 2014 Gr 5 Formatted Matched Handmatching.txt"

		"$charter\up academy\matched\UP2011_Matched_FUZZY.txt"		
		"$charter\up academy\matched\UP2012_Matched_FUZZY.txt"
		"$charter\up academy\formatted\Spring2013\UP Academy Sp2013 Formatted Matched.txt"
		"$charter\up academy\formatted\Spring 2014\UP Boston Spring 2014 Gr6 Formatted Matched Handmatching.txt"

		
		"$charter\edward brooke I_roslindale\formatted\Spring 2007\Roslindale 2007 Gr K Formatted Matched_fixedsasids.txt"
		"$charter\edward brooke I_roslindale\formatted\Spring 2008\Roslindale 2008 Gr K Formatted Matched Fixed_with_fixedsasids.txt"
		"$charter\edward brooke I_roslindale\formatted\Spring 2009\Roslindale 2009 Gr K Formatted Matched.txt"
		"$charter\edward brooke I_roslindale\formatted\Spring 2010\Roslindale 2010 Gr K Formatted Matched Fixed.txt"
		"$charter\edward brooke I_roslindale\formatted\Spring 2011\Roslindale 2011 Gr K Formatted Matched FIXED_with_fixedsasids.txt"
		"$charter\edward brooke I_roslindale\formatted\Spring 2012\Roslindale Spring 2012 Gr K Formatted Matched FIXED_with_fixedsasids.txt"
		"$charter\edward brooke I_roslindale\formatted\Spring 2013\Roslindale Spring 2013 Gr K Formatted Matched.txt"
		"$charter\edward brooke I_roslindale\formatted\Spring 2014\Roslindale 2014 Gr K Formatted Matched FIXED_with_fixedsasids.txt"
		
		"$charter\edward brooke II_mattapan\formatted\Spring 2011\Brooke2 2011 Gr K Formatted Matched_with_fixedsasids.txt"
		"$charter\edward brooke II_mattapan\formatted\Spring 2011\Brooke2 2011 Gr 1 Formatted Matched.txt"
		"$charter\edward brooke II_mattapan\formatted\Spring 2012\Brooke2 2012 Gr K Formatted Matched FIXED.txt"
		"$charter\edward brooke II_mattapan\formatted\Spring 2013\Brooke2 2013 Gr K Formatted Matched_with_fixedsasids.txt"
		"$charter\edward brooke II_mattapan\formatted\Spring 2014\Brooke2 2014 Gr K Formatted Matched FIXED_with_fixedsasids.txt"

		"$charter\edward brooke III_east boston\formatted\Spring 2012\Brooke3 2012 Gr K Formatted Matched.txt"
		"$charter\edward brooke III_east boston\formatted\Spring 2012\Brooke3 2012 Gr 1 Formatted Matched FIXED.txt"
		"$charter\edward brooke III_east boston\formatted\Spring 2013\Brooke3 2013 Gr K Formatted Matched FIXED.txt"
		"$charter\edward brooke III_east boston\formatted\Spring 2014\Brooke3 2014 Gr K Formatted Matched FIXED.txt"		

		"$charter\match es\formatted\Spring 2011\Match ES 2011 PreK Formatted Matched.txt"
		"$charter\match es\formatted\Spring 2011\Match ES 2011 Gr 2 Formatted Matched Fixed.txt"
		"$charter\match es\formatted\Spring 2012\Match ES 2012 Pre K Formatted Matched Fixed.txt"
		"$charter\match es\formatted\Spring 2012\Match ES 2012 Gr 2 Formatted Matched.txt"
		"$charter\match es\formatted\Spring 2013\Match ES 2013 Pre K Formatted Matched.txt"
		"$charter\match es\formatted\Spring 2013\Match ES 2013 Gr 2 Formatted Matched Fixed_with_fixedsasids.txt" 
		
		"$charter\neighborhood_house\formatted\Spring 2004\Neighborhood House 2004 Pre K Formatted Matched.txt"
		"$charter\neighborhood_house\formatted\Spring 2005\Neighborhood House 2005 Pre K Formatted Matched.txt"
		"$charter\neighborhood_house\formatted\Spring 2006\Neighborhood House 2006 Pre K Formatted Matched.txt"
		"$charter\neighborhood_house\formatted\Spring 2007\Neighborhood House 2007 Pre K Formatted Matched.txt"
		"$charter\neighborhood_house\formatted\Spring 2008\Neighborhood House 2008 Pre K Formatted Matched Fixed.txt"				
		"$charter\neighborhood_house\formatted\Spring 2009\Neighborhood House 2009 Pre K Formatted Matched Fixed.txt"
		"$charter\neighborhood_house\formatted\Spring 2010\Neighborhood House 2010 Pre K Formatted Matched FIXED_with_fixedsasids.txt"	
		"$charter\neighborhood_house\formatted\Spring 2011\Neighborhood House 2011 Pre K Formatted Matched Fixed.txt"
		"$charter\neighborhood_house\formatted\Spring 2012\Neighborhood House 2012 Pre K Formatted Matched FIXED.txt"
		"$charter\neighborhood_house\formatted\Spring 2013\Neighborhood House 2013 Pre K Formatted Matched.txt"		
		
		"$charter\conservatorylab\formatted\Spring 2009\Conservatory Lab Spring 2009 Pre K Formatted Matched Fixed.txt"
		"$charter\conservatorylab\formatted\Spring 2009\Conservatory Lab Spring 2009 Gr K Formatted Matched Fixed.txt"
		"$charter\conservatorylab\formatted\Spring 2010\Conservatory Lab Spring 2010 Pre K Formatted Matched.txt"
		"$charter\conservatorylab\formatted\Spring 2011\Conservatory Lab Spring 2011 Pre K Formatted Matched.txt"
		"$charter\conservatorylab\formatted\Spring 2012\Conservatory Lab Spring 2012 Pre K Formatted Matched.txt"
		"$charter\conservatorylab\formatted\Spring 2013\Conservatory Lab Spring 2013 Pre K Formatted Matched.txt"
		"$charter\conservatorylab\formatted\Spring 2014\Conservatory Lab Spring 2014 Gr 2 Formatted Matched FIXED.txt"
		"$charter\conservatorylab\formatted\Spring 2014\Conservatory Lab Spring 2014 Gr 4 Formatted Matched FIXED.txt"		
		
		"$charter\bridgeboston\formatted\Spring 2011\Bridge Boston Spring 2011 Gr K Formatted Matched.txt"
		"$charter\bridgeboston\formatted\Spring 2011\Bridge Boston Spring 2011 Pre K Formatted Matched.txt"
		"$charter\bridgeboston\formatted\Spring 2012\Bridge Boston Spring 2012 Pre K Formatted Matched.txt"
		"$charter\bridgeboston\formatted\Spring 2013\Bridge Boston Spring 2013 Pre K Formatted Matched.txt"
		
		"$charter\kipp boston\formatted\Spring 2014\KIPPAcademyBoston Spring 2014 Gr K Formatted Matched FIXED_with_fixedsasids.txt"
		
		"$charter\codman\formatted\Spring 2013 ES\Codman Spring 2013 Gr K Formatted Matched Fixed.txt"
		"$charter\codman\formatted\Spring 2013 ES\Codman Spring 2013 Pre K Formatted Matched.txt"
		"$charter\codman\formatted\Spring 2014 ES\Codman Spring 2014 Gr 2 Formatted Matched.txt"	
		
	"$charter/cape cod lighthouse/formatted/cape cod spring 2007 matched.txt"  
	"$charter/cape cod lighthouse/formatted/cape cod spring 2008 matched.txt"
	"$charter/cape cod lighthouse/formatted/Cape Cod Spring 2009 Lottery matched.txt" 
	"$charter/cape cod lighthouse/formatted/Cape_Cod_2010_Matched.txt" 

	"$charter/four rivers/formatted/Four Rivers Spring 2003 Lottery - matched.txt"  
	"$charter/four rivers/formatted/Four Rivers Spring 2004 Lottery - matched.txt"  
	"$charter/four rivers/formatted/Four Rivers Spring 2005 Lottery - matched.txt"  
	"$charter/four rivers/formatted/Four Rivers Spring 2006 Lottery - matched.txt"  
	"$charter/four rivers/formatted/Four Rivers Spring 2007 Lottery - matched.txt"  
	"$charter/four rivers/formatted/Four Rivers Spring 2008 Lottery - matched.txt"  
	"$charter/four rivers/formatted/Four Rivers Spring 2009 7th Grade Lottery matched.txt" 
	"$charter/four rivers/formatted/FourRivers_2010_Matched.txt" 
	"$charter/four rivers/formatted/2011-2012/FourRiversSpring2011_Matched.txt" 
	"$charter/four rivers/formatted/Four Rivers Spring 2012 Lottery_Matched.txt" 
	
	"$charter/francis w parker/formatted/Francis Parker Essential School Spring 2006 Lottery - matched.txt"  
	"$charter/francis w parker/formatted/Francis Parker Essential School Spring 2007 Lottery - matched.txt"  
	"$charter/francis w parker/formatted/Francis Parker Essential School Spring 2008 Lottery - matched.txt"  
	"$charter/francis w parker/formatted/Francis Parker Essential School Spring 2009 matched.txt"
	"$charter/francis w parker/formatted/Francis_Parker_2010_Matched.txt"
	"$charter/francis w parker/formatted/2011 Lottery - 2019 Update/Francis Parker Essential School Spring 2011 7th Grade Lottery Matched.txt"
	
	"$charter/global learning/formatted/Global Learning Spring 2006 Lottery - matched.txt"  
	"$charter/global learning/formatted/Global Learning Spring 2007 Lottery - matched.txt"  
	"$charter/global learning/formatted/Global Learning Spring 2009 Grade 5 Lottery matched.txt"
	
	"$charter/innovation/formatted/Innovation Academy Spring 2007 Lottery - matched.txt"  
	"$charter/innovation/formatted/Innovation Academy Spring 2008 Lottery - matched.txt"  
	"$charter/innovation/formatted/Innovation Academy Spring 2009 Grade 5 Lottery matched.txt"
	"$charter/innovation/formatted/Innovation_2010_Matched.txt"
	"$charter/innovation/formatted/2011-2012/Innovation Formatted Matched.txt"	

	"$charter/kipp lynn/formatted/kipp_gradeappfixed.csv"
	"$charter/kipp lynn/formatted/KIPP Lynn Spring 2009 Grade 5 Lottery matchednew.txt"
	"$charter/kipp lynn/formatted/2011/KIPP Lynn Spring 2011 Grade 5 Lottery Matched.txt"
	
	"$charter/marblehead community charter public school/formatted/Marblehead Community Charter Spring 2005 Lottery - matched.txt"  
	"$charter/marblehead community charter public school/formatted/Marblehead Community Charter Spring 2006 Lottery - matched.txt"  
	"$charter/marblehead community charter public school/formatted/Marblehead Community Charter Spring 2007 Lottery - matched.txt" 
	"$charter/marblehead community charter public school/formatted/Marblehead Spring 2009 4th Grade Lottery matched.txt"
	"$charter/marblehead community charter public school/formatted/Marblehead_2010_Matched.txt"
	
	"$charter/pioneer valley performing arts/formatted/Pioneer Valley Performing Arts Spring 2006 Lottery - matched.txt"  
	"$charter/pioneer valley performing arts/formatted/Pioneer Valley Performing Arts Spring 2007 Lottery - matched.txt" 	 
	"$charter/pioneer valley performing arts/formatted/Pioneer Valley Performing Arts Spring 2008 Lottery - matched.txt"
	"$charter/pioneer valley performing arts/formatted/Pioneer Valley Performing Arts Spring 2009 7th Grade Lottery matchednew.txt"
	"$charter/pioneer valley performing arts/formatted/Pioneer Valley Performing Arts Spring 2010 7th Grade Lottery Matched.txt"
	
	"$charter/rising tide/formatted/Rising Tide Spring 2009 5th Grade Lottery matched.txt"
	"$charter/rising tide/formatted/Rising Tide Spring 2010 5th Grade Lottery Matched.txt"
	
	"$charter/salem academy charter/formatted/Salem_Academy_2010_Matched.txt"		
	"$charter\salem academy charter\formatted\2011-2012\SAC 2011 Formatted_Revised 4.11.19 Matched.txt"
	
	"$charter/sturgis charter/formatted/Sturgis Spring 2004 Lottery - matched.txt"
	"$charter/sturgis charter/formatted/Sturgis Spring 2006 Lottery - matched.txt"
	"$charter/sturgis charter/formatted/Sturgis Spring 2008 9th Grade Lottery matched.txt"
	"$charter/sturgis charter/formatted/Sturgis_2009_Matched.txt"	
	"$charter/sturgis charter/formatted/Sturgis_2010_Matched.txt"	
	"$charter/sturgis charter/formatted/2011-2012/Sturgis_2011_Matched.txt"	
	
	"' ;
	
#delimit  cr

//Status on nonurban files 
* Cape Cod Lighthouse ends in 2010 (6th grade entrance)
* Four Rivers 2011 added -- Sarah formatted and matched
* Francis Parker 2011 added -- Sarah checked and rematched
* Global Learning -- cannot add w/out paper files, Sarah checked risk sets
* Innovation 2011 added -- Sarah checked and rematched
* KIPP Lynn 2011 sarah checked and rematched -- students may not be old enough yet
* Marblehead 2011 and 2012 possible to add but cohorts too young -- (start in 4th grade)
* PVPA -- cleaned up 2010
* Rising Tide -- cleaned up 2010
* Added Salem Academy 2011 (match rate above 80% but could maybe use some more work)
* Added Sturgis 2011 -- cleanedup and rematched
	
*Hampden

*CW 1 27 2012:  ADD NEW SHORTNAMES FOR NEW LOTTOS
*MAKE SURE SHORTNAMES AND PATHS MATCH UP
local shortnames  ///
	APR2005_6th APR2006_6th APR2007_6th APR2007_5th APR2008_5th APR2009_5th APR2010_5th APR2011_5th APR2012_5th APR2013_5th ///
	BosCol2007 BosCol2008 BosCol2009 BosCol2010 BosCol2011 BosCol2012 BosCol2013 BosCol2014 ///
	BGA2011 BGA2012 BGA2013 BGA2014 ///
	BosPrep2008 BosPrep2009 BosPrep2010 BosPrep2011 BosPrep2012 BosPrep2013 BosPrep2014 ///
	BosPrep_9_2014 ///
	CoaH2007 CoaH2008 CoaH2009 Coah2010 Coah2011 Coah2012 Coah2013 Coah2014 ///
	CoaHII2013 CoaHII2014 ///
	Codman2008 Codman2009 Codman2010 Codman2011 Codman2012 Codman2013 Codman2014 ///	
	Codman_5_2014 Codman_6_2014 ///
	DCA2012 ///
	Brooke2007 Brooke2008 Brooke2009 ///
	BrookeII2011 BrookeII2012 BrookeII2013 BrookeII2014 ///
	BrookeIII2012 BrookeIII2013 BrookeIII2014 ///
	Excel2008 Excel2009 Excel2010 Excel2011 Excel2012 Excel2013 ///
	ExcelIII2012 ExcelIII2013 ///
	GroveHall2011 ///
	KIPP2012 KIPP2013 KIPP2014 ///
	MatchHS2007 MatchHS2008 MatchHS2009 MatchHS2010 ///
	MatchMS2008 MatchMS2009 MatchMS2010 MatchMS2011 MatchMS2012 MatchMS2013 MatchMS2014 ///
	RoxPrep2006 RoxPrep2007 RoxPrep2008 RoxPrep2009 RoxPrep2010 RoxPrep2011 ///
	UncommonSchools2012 UncommonSchools2013 UncommonSchools2014 ///
	UP2011 UP2012 UP2013 UP2014 ///
	E_BrookeK2_2007 E_BrookeK2_2008 E_BrookeK2_2009 E_BrookeK2_2010 E_BrookeK2_2011 E_BrookeK2_2012 E_BrookeK2_2013 E_BrookeK2_2014 ///
	E_BrookeIIK2_2011 E_BrookeII1_2011 E_BrookeIIK2_2012 E_BrookeIIK2_2013 E_BrookeIIK2_2014 ///
	E_BrookeIIIK2_2012 E_BrookeIII1_2012 E_BrookeIIIK2_2013 E_BrookeIIIK2_2014  ///
	E_MatchK1_2011 E_Match2_2011 E_MatchK1_2012 E_Match2_2012 E_MatchK1_2013 E_Match2_2013  ///
	E_NHK1_2004 E_NHK1_2005 E_NHK1_2006 E_NHK1_2007 E_NHK1_2008 E_NHK1_2009 E_NHK1_2010 E_NHK1_2011 E_NHK1_2012 E_NHK1_2013 ///
	E_ConservK1_2009 E_ConservK2_2009 E_ConservK1_2010 E_ConservK1_2011 E_ConservK1_2012 E_ConservK1_2013 E_Conserv2_2014 E_Conserv4_2014  ///
	E_BridgeK2_2011 E_BridgeK1_2011 E_BridgeK2_2012 E_BridgeK2_2013  ///
	E_KIPPK2_2014 ///
	E_CodmanK2_2013 E_CodmanK1_2013 E_Codman2_2014 ///
	CapeCod2007 CapeCod2008 CapeCod2009 CapeCod2010 ///
	FourRiv2003 FourRiv2004 FourRiv2005 FourRiv2006  FourRiv2007 FourRiv2008 FourRiv2009 FourRiv2010 FourRiv2011 FourRiv2012 ///
	Parker2006 Parker2007 Parker2008 Parker2009 Parker2010 Parker2011 ///
	Global2006 Global2007 Global2009 ///
	Innov2007 Innov2008 Innov2009 Innov2010 Innov2011 ///
	KIPP KIPP2009 KIPP2011 /// KIPP Lynn
	Marble2005   Marble2006 Marble2007 Marble2008 Marble2009 ///
	PVPA2006 PVPA2007 PVPA2008 PVPA2009 PVPA2010 ///
	RisingTide2009 RisingTide2010 ///
	SalemAc2010 SalemAc2011 ///
	Sturgis2004 Sturgis2006 Sturgis2008 Sturgis2009 Sturgis2010 Sturgis2011
	
foreach p of global pathnames {
	local shortindex=`shortindex'+1
	local item: word `shortindex' of `shortnames'
	* disp "Short=`item', long=`p'"
	inputcharter, path(`p') short(`item')
	}



*CW 1 27 2012:  ADD NEW LABELS TO THIS LIST
*Here, add all shortnames EXCEPT FOR THE FIRST ONE APR2005_6th
local appendnames  ///
	/* APR2005_6th*/  APR2006_6th APR2007_6th APR2007_5th APR2008_5th APR2009_5th APR2010_5th APR2011_5th APR2012_5th APR2013_5th ///
	BosCol2007 BosCol2008 BosCol2009 BosCol2010 BosCol2011 BosCol2012 BosCol2013 BosCol2014 ///
	BGA2011 BGA2012 BGA2013 BGA2014 ///
	BosPrep2008 BosPrep2009 BosPrep2010 BosPrep2011 BosPrep2012 BosPrep2013 BosPrep2014 ///
	BosPrep_9_2014 ///
	CoaH2007 CoaH2008 CoaH2009 Coah2010 Coah2011 Coah2012 Coah2013 Coah2014 ///
	CoaHII2013 CoaHII2014 ///
	Codman2008 Codman2009 Codman2010 Codman2011 Codman2012 Codman2013 Codman2014 ///	
	Codman_5_2014 Codman_6_2014 ///
	DCA2012 ///
	Brooke2007 Brooke2008 Brooke2009 ///
	BrookeII2011 BrookeII2012 BrookeII2013 BrookeII2014 ///
	BrookeIII2012 BrookeIII2013 BrookeIII2014 ///
	Excel2008 Excel2009 Excel2010 Excel2011 Excel2012 Excel2013 ///
	ExcelIII2012 ExcelIII2013 ///
	GroveHall2011 ///
	KIPP2012 KIPP2013 KIPP2014 ///
	MatchHS2007 MatchHS2008 MatchHS2009 MatchHS2010 ///
	MatchMS2008 MatchMS2009 MatchMS2010 MatchMS2011 MatchMS2012 MatchMS2013 MatchMS2014 ///
	RoxPrep2006 RoxPrep2007 RoxPrep2008 RoxPrep2009 RoxPrep2010 RoxPrep2011 ///
	UncommonSchools2012 UncommonSchools2013 UncommonSchools2014 ///
	UP2011 UP2012 UP2013 UP2014 ///
	E_BrookeK2_2007 E_BrookeK2_2008 E_BrookeK2_2009 E_BrookeK2_2010 E_BrookeK2_2011 E_BrookeK2_2012 E_BrookeK2_2013 E_BrookeK2_2014 ///
	E_BrookeIIK2_2011 E_BrookeII1_2011 E_BrookeIIK2_2012 E_BrookeIIK2_2013 E_BrookeIIK2_2014 ///
	E_BrookeIIIK2_2012 E_BrookeIII1_2012 E_BrookeIIIK2_2013 E_BrookeIIIK2_2014  ///
	E_MatchK1_2011 E_Match2_2011 E_MatchK1_2012 E_Match2_2012 E_MatchK1_2013 E_Match2_2013  ///
	E_NHK1_2004 E_NHK1_2005 E_NHK1_2006 E_NHK1_2007 E_NHK1_2008 E_NHK1_2009 E_NHK1_2010 E_NHK1_2011 E_NHK1_2012 E_NHK1_2013 ///
	E_ConservK1_2009 E_ConservK2_2009 E_ConservK1_2010 E_ConservK1_2011 E_ConservK1_2012 E_ConservK1_2013 E_Conserv2_2014 E_Conserv4_2014  ///
	E_BridgeK2_2011 E_BridgeK1_2011 E_BridgeK2_2012 E_BridgeK2_2013  ///
	E_KIPPK2_2014 ///
	E_CodmanK2_2013 E_CodmanK1_2013 E_Codman2_2014 ///
	CapeCod2007 CapeCod2008 CapeCod2009 CapeCod2010 ///
	FourRiv2003 FourRiv2004 FourRiv2005 FourRiv2006  FourRiv2007 FourRiv2008 FourRiv2009 FourRiv2010 FourRiv2011 FourRiv2012 ///
	Parker2006 Parker2007 Parker2008 Parker2009 Parker2010 Parker2011 ///
	Global2006 Global2007 Global2009 ///
	Innov2007 Innov2008 Innov2009 Innov2010 Innov2011 ///
	KIPP KIPP2009 KIPP2011 /// KIPP Lynn
	Marble2005   Marble2006 Marble2007 Marble2008 Marble2009 ///
	PVPA2006 PVPA2007 PVPA2008 PVPA2009 PVPA2010 ///
	RisingTide2009 RisingTide2010 ///
	SalemAc2010 SalemAc2011 /// 2011 available but match rate low
	Sturgis2004 Sturgis2006 Sturgis2008 Sturgis2009 Sturgis2010 Sturgis2011
		
use APR2005_6th, clear	
	foreach l of local appendnames {
		di "Going to append `l' next"
		append using `l', force //added force for sturgis2010
	}
	
	drop v*

*This section: 
	*	- codes offers for lottery cutoffs
	
*D.SUN: IMPUTE HERE INITIAL-OFFER FOR MATCH'2008 COHORT 
*Match 2008
gen lotnum_match08 = lotterynumber if school == "MATCH_HS" & year==2008
destring lotnum_match08, replace force
replace initial_offer = 1 if school=="MATCH_HS" & year==2008 & (lotnum_match08 <= 65 & lotnum_match08 >= 1 & lotnum_match08 ~=.)
replace initial_offer = 0 if school=="MATCH_HS" & year==2008 & (lotnum_match08 > 65)
drop lotnum_match08

*D.SUN: impute CoaH'09 initial offer by using 2008 year's initial offer
*CoaH 2009
gen lotnum_coah09 = lotterynumber if school == "COAH" & year==2009
destring lotnum_coah09, replace force
replace initial_offer = 1 if school=="COAH" & year==2009 & (lotnum_coah09 <= 175 & lotnum_coah09 >=1 & lotnum_coah09 ~=.)
replace initial_offer = 0 if school=="COAH" & year==2009 & (lotnum_coah09 > 175)
drop lotnum_coah09

*D.Sun 9/12/2013: Impute Match MS'09 initial offer using 2008 years' initial offer (lottery number 93)
*Match MS 2009
gen lotnum_matchms09 = lotterynumber if school == "MATCH MS" & year==2009
destring lotnum_matchms09, replace force
replace initial_offer = 1 if school == "MATCH MS" & year==2009 & (lotnum_matchms09 <= 93 & lotnum_matchms09 >=1 & lotnum_matchms09 ~=.)
replace initial_offer = 0 if school == "MATCH MS" & year==2009 & (lotnum_matchms09 > 93)
drop lotnum_matchms09

*D.Sun 9/12/2013: Impute Roxbury 2002-2005, and 2009 initial offer w/ 2008's initial offer (lottery number: 110)
gen lotnum_roxprep09 = lotterynumber if school == "Roxbury Prep" & year==2009
destring lotnum_roxprep09, replace force
replace initial_offer = 1 if school == "Roxbury Prep" & year==2009 & (lotnum_roxprep09 <= 110 & lotnum_roxprep09 >=1 & lotnum_roxprep09 ~=.)
replace initial_offer = 0 if school == "Roxbury Prep" & year==2009 & (lotnum_roxprep09 > 110)
drop lotnum_roxprep09

*D.Sun 9/14/2013: Impute Boston Collegiate 2012 ever offer 
gen lotnum_boscol12 = lotterynumber if school=="Boston Collegiate" & year==2012
destring lotnum_boscol12, replace 
replace offer=1 if school=="Boston Collegiate" & year==2012 & (lotnum_boscol12 <= 90 & lotnum_boscol12 >=1 & lotnum_boscol12 ~=.)
replace offer = 0 if school == "Boston Collegiate" & year==2012 & (lotnum_boscol12 > 90)
drop lotnum_boscol12


***********SCHOOLS WITH ADDITIONAL RISK SETS***************

* EMS added 8/8/2013. The new schools have additional variables - for now we are not looking at these, but might want to add them in later
	* 1st, 2nd, and 3rd choices, and offers for Uncommon schools applicants

/* Create Risk Sets within Schools EMS 8-19-2013 */	
 destring prioritygroupnumber, replace
 rename prioritygroupnumber applyprioritygroup /* Prioritygroup is for BGA, UPAcademy */
 replace applyprioritygroup = 2 if offerredadmission2ndlotteryever=="1" /* This is the second lottery for EdBrooke3 2012 */
 replace applyprioritygroup = 2 if secondlottery=="1" /* this is Excel I 2009 second lottery. no one got offers */
 replace applyprioritygroup =0 if applyprioritygroup==.
 
 drop offerredadmission2ndlotteryever secondlottery
 
 * for 2014 UCS
	foreach off in initialoffer everoffer {
		destring updated`off'*, replace
		replace updated`off'gh=`off'ls if year==2014 & `off'ls!=.
		replace updated`off'roxp=`off'mh if year==2014 & `off'mh!=.
		replace updated`off'dp=`off'dp if year==2014 & `off'dp!=.
		
		drop `off'ls `off'mh `off'dp
	}	
		
 * for 2012, 2013, and 2014 UCS
	 foreach v in roxp gh dp {
		replace initial_offer = updatedinitialoffer`v' if (updatedinitialoffer`v' !=. & updatedinitialoffer`v' != 0)
		replace offer = updatedeveroffer`v' if (updatedeveroffer`v' !=. & updatedeveroffer`v' != 0)
	}
	gen initial_offerRoxPrep_corr = updatedinitialofferroxp
	gen offerRoxPrep_corr = updatedeverofferroxp
	gen initial_offerGroveH_corr = updatedinitialoffergh
	gen offerGroveH_corr = updatedeveroffergh
	gen initial_offerDP = updatedinitialofferdp
	gen offerDP = updatedeverofferdp
	foreach v in roxp gh dp {	
		drop updatedinitialoffer`v'
		drop updatedeveroffer`v'
	}	

	foreach off in initial_offer offer {
		gen `off'Uncommon = 1 if (`off'RoxPrep_corr==1 | `off'GroveH_corr==1 | `off'DP==1)
			replace `off'Uncommon = 0 if `off'Uncommon==. & school=="UncommonSchools"
	}

**********SCHOOLS WITH ADDITIONAL RISK SETS***************
***Global learning has multiple lotteries, but cutoffs for initial within each lottery and ever cutoff in 2nd lottery

*CW 1 27 2012:  CHECK FOR ADDITIONAL RISK SETS
*2006
gen risk_GL2006_1stlotto=0 /*if school=="Global Learning" &year==2006*/
	replace risk_GL2006_1stlotto=1 if school=="Global Learning" &year==2006&lotterybatch==1
gen risk_GL2006_2ndlotto=0 /*if school=="Global Learning" &year==2006*/
	replace risk_GL2006_2ndlotto=1 if school=="Global Learning" &year==2006&lotterybatch==2
*2007
gen risk_GL2007_1stlotto=0 /*if school=="Global Learning" &year==2007*/
	replace risk_GL2007_1stlotto=1 if school=="Global Learning" &year==2007&lotterybatch==1
gen risk_GL2007_2ndlotto=0 /*if school=="Global Learning" &year==2007*/
	replace risk_GL2007_2ndlotto=1 if school=="Global Learning" &year==2007&lotterybatch==2
gen risk_GL2007_3rdlotto=0 /*if school=="Global Learning" &year==2007*/
	replace risk_GL2007_3rdlotto=1 if school=="Global Learning" &year==2007&lotterybatch==3
*2009:  nobody in 2nd or 3rd lotteries offered
gen risk_GL2009_1stlotto=0 /*if school=="Global Learning" &year==2007*/
	replace risk_GL2009_1stlotto=1 if school=="Global Learning" &year==2009&lotterybatch==1
gen risk_GL2009_2ndlotto=0 /*if school=="Global Learning" &year==2007*/
	replace risk_GL2007_2ndlotto=1 if school=="Global Learning" &year==2009&lotterybatch==2
gen risk_GL2009_3rdlotto=0 /*if school=="Global Learning" &year==2007*/
	replace risk_GL2009_3rdlotto=1 if school=="Global Learning" &year==2009&lotterybatch==3
cap drop lotterybatch


***Innovation has "separate" lotteries for "Local (chelmsford" and "out of town" -- but out of towners do get in
*2007
gen risk_IA2007_local=0 /*if  school=="Innovation Academy" &year==2007*/
	replace risk_IA2007_local=1 if outofarea==0 &  school=="Innovation Academy" &year==2007
gen risk_IA2007_outoftown=0  /*if school=="Innovation Academy" &year==2007*/
	replace risk_IA2007_outoftown=1 if outofarea==1 &  school=="Innovation Academy" &year==2007
replace outofarea=. if   school=="Innovation Academy" &year==2007
*2008
gen risk_IA2008_local=0 /*if school=="Innovation Academy" &year==2008*/
	replace risk_IA2008_local=1 if outofarea==0 &  school=="Innovation Academy" &year==2008
gen risk_IA2008_outoftown=0 /*if school=="Innovation Academy" &year==2008*/
	replace risk_IA2008_outoftown=1 if outofarea==1 &  school=="Innovation Academy" &year==2008
replace outofarea=. if   school=="Innovation Academy" &year==2008
*2009
gen risk_IA2009_local=0 /*if school=="Innovation Academy" &year==2008*/
	replace risk_IA2009_local=1 if outofarea==0 &  school=="Innovation Academy" &year==2009
gen risk_IA2009_outoftown=0 /*if school=="Innovation Academy" &year==2008*/
	replace risk_IA2009_outoftown=1 if outofarea==1 &  school=="Innovation Academy" &year==2009
replace outofarea=. if   school=="Innovation Academy" &year==2009
*2010
gen risk_IA2010_local=0 
	replace risk_IA2010_local=1 if outofarea==0 &  school=="Innovation Academy" &year==2010
gen risk_IA2010_outoftown=0 
	replace risk_IA2010_outoftown=1 if outofarea==1 &  school=="Innovation Academy" &year==2010
replace outofarea=. if   school=="Innovation Academy" &year==2010
*2011
gen risk_IA2011_local=0 
	replace risk_IA2011_local=1 if outofarea==0 &  school=="Innovation Academy" &year==2011
gen risk_IA2011_outoftown=0 
	replace risk_IA2011_outoftown=1 if outofarea==1 &  school=="Innovation Academy" &year==2011
replace outofarea=. if   school=="Innovation Academy" &year==2011


***Marblehead has "separate" lotteries for "Local (Marblehead)" and "out of town" -- but out of towners do get in
*2005
gen risk_Marble2005_local=0 /*if school=="Marblehead Community MS" &year==2005*/
	replace risk_Marble2005_local=1 if outofarea==0 &  school=="Marblehead Community MS" &year==2005
gen risk_Marble2005_outoftown=0 /*if school=="Marblehead Community MS" &year==2005*/
	replace risk_Marble2005_outoftown=1 if outofarea==1 &  school=="Marblehead Community MS" &year==2005
replace outofarea=. if   school=="Marblehead Community MS" &year==2005
*2006
gen risk_Marble2006_local=0 /*if school=="Marblehead Community MS" &year==2006*/
	replace risk_Marble2006_local=1 if outofarea==0 &  school=="Marblehead Community MS" &year==2006
gen risk_Marble2006_outoftown=0 /*if school=="Marblehead Community MS" &year==2006*/
	replace risk_Marble2006_outoftown=1 if outofarea==1 &  school=="Marblehead Community MS" &year==2006
replace outofarea=. if   school=="Marblehead Community MS" &year==2006
*2007
gen risk_Marble2007_local=0 /*if school=="Marblehead Community MS" &year==2007*/
	replace risk_Marble2007_local=1 if outofarea==0 &  school=="Marblehead Community MS" &year==2007
gen risk_Marble2007_outoftown=0 /*if school=="Marblehead Community MS" &year==2007*/
	replace risk_Marble2007_outoftown=1 if outofarea==1 &  school=="Marblehead Community MS" &year==2007
replace outofarea=. if   school=="Marblehead Community MS" &year==2007
*2008
gen risk_Marble2008_local=0 /*if school=="Marblehead Community MS" &year==2008*/
	replace risk_Marble2008_local=1 if outofarea==0 &  school=="Marblehead Community MS" &year==2008
gen risk_Marble2008_outoftown=0 /*if school=="Marblehead Community MS" &year==2008*/
	replace risk_Marble2008_outoftown=1 if outofarea==1 &  school=="Marblehead Community MS" &year==2008
replace outofarea=. if   school=="Marblehead Community MS" &year==2008
*2009
gen risk_Marble2009_local=0 /*if school=="Marblehead Community MS" &year==2009*/
	replace risk_Marble2009_local=1 if outofarea==0 &  school=="Marblehead Community MS" &year==2009
gen risk_Marble2009_outoftown=0 /*if school=="Marblehead Community MS" &year==2009*/
	replace risk_Marble2009_outoftown=1 if outofarea==1 &  school=="Marblehead Community MS" &year==2009
replace outofarea=. if   school=="Marblehead Community MS" &year==2009

*CW 1 27 2012:  CHECK SCHOOL NAMES FOR NEW COHORTS
replace school="APR" if school=="Academy of the Pacific Rim" | school == "ACADEMY OF THE PACIFIC RIM"
replace school="BosCol" if school=="Boston Collegiate" | school=="Boston Collegiate Charter School" | school=="Boston Collegiate MS" | school=="BCCS" | school=="BOSTON COLLEGIATE"
replace school="BGA" if school=="Boston Green Academy" | school=="BOSTON GREEN ACADEMY" | school=="BGA"
replace school="BosPrep" if school=="Boston Prep MS" | school=="Boston Prep" | school=="Boston Preparatory Charter Public School"
replace school="COAH" if school=="City on a Hill" | school == "CITY ON A HILL" | school == "City on a Hill I"
replace school="COAHII" if school=="City on a Hill II" | school=="City on a Hill Charter School Dudley"
replace school="Codman" if school=="Codman Academy" | school=="CODMAN ACADEMY" | school=="Codman Charter School" | school=="Codman School"
replace school="DCA" if school=="Dorchester Collegiate"
replace school="EdBrooke" if school=="Edward Brooke" | school=="EDWARD BROOKE ROSLINDALE CHARTER SCHOOL" | school=="EDWARDS BROOKE ROSLINDALE CHARTER SCHOOL" | school=="Edward Brooke Roslindale Charter School"
replace school="EdBrooke2" if school=="EDWARD BROOKE 2 CHARTER SCHOOL" | school=="Edward Brooke 2 Charter School"
replace school="EdBrooke3" if school=="BROOKE EAST BOSTON" | school == "Edward Brooke III" | school=="Edward Brooke III (East Boston)"
replace school="Excel" if school=="Excel Academy" | school=="EXCEL ACADEMY" | school=="Excel East Boston" | school=="Excel I East Boston" | school=="Excel Academy I (East Boston)"
replace school="Excel3" if school=="Excel III Oriental Heights" | school=="Excel Academy III (Orient Heights)"
replace school="GroveH" if school=="GROVE HALL PREP"
replace school="KIPP_BOS" if school=="KIPP Academy Boston"
replace school="MATCH_HS" if school=="MATCH High School" | school=="MATCH HS" | school=="Match HS" | school=="MATCH HIGH SCHOOL"
replace school="MATCH_MS" if school=="MATCH Middle School" | school=="MATCH MS" | school=="Match MS" | school=="MATCH MIDDLE SCHOOL" | school=="Match Middle School"
replace school="RoxPrep" if school=="Roxbury Preparatory" | school=="Roxbury Prep" | school=="ROXBURY PREP CHARTER SCHOOL"
replace school="UncommonSchools" if school=="Uncommon Schools"
replace school="UPAcademy" if school=="UP Academy" | school=="UP ACADEMY" | school=="UP Academy Boston"

replace school="BridgeB" if school=="Bridge Boston Charter School"
replace school="Conserv" if school=="Conservatory Lab Charter School"
replace school="MATCH_ES" if school=="March Community Day School" | school=="Match Community Day School"
replace school="NHCS" if school=="Neighborhood House Charter School"


replace school="Cape Cod" if school=="Cape Cod Lighthouse"
replace school="FourRiv" if school=="Four Rivers Charter"
replace school="Parker" if school=="Francis Parker Essential School"
replace school="Parker" if school=="Francis Parker"
replace school="Parker" if school=="FRANCIS W PARKER CHARTER ESSENTIAL"
replace school="Innovation" if school=="Innovation Academy"
replace school="Innovation" if school=="INNOVATION ACADEMY"
replace school="KIPP_Lynn" if school=="KIPP Lynn"
replace school="KIPP_Lynn" if school=="KIPP"
replace school="KIPP_Lynn" if school=="KIPP LYNN MIDDLE SCHOOL"
replace school="Marblehead" if school=="Marblehead Community MS"
replace school="PVPA" if school=="Pioneer Valley Performing Arts"
replace school="PVPA" if school=="Pioneer Valley"
replace school="Global" if school=="Global Learning"
replace school="Global" if school=="Global Learning MS"
replace school="FourRiv" if school=="Four Rivers"
replace school="FourRiv" if school=="FOUR RIVERS CHARTER SCHOOL"
replace school="CapeCod" if school=="Cape Cod"
replace school="RisingTide" if school=="Rising Tide"
replace school="SalemAc" if school=="Salem Academy Charter Middle School"
replace school="SalemAc" if school=="SALEM ACADEMY CHARTER"
replace school="Sturgis" if school=="STURGIS CHARTER PUBLIC SCHOOL"

tab school
 
gen byte boston_lottery=0

replace boston_lottery=1 if school=="APR" | school=="BosCol" | school=="BGA" | school=="BosPrep"|school=="COAH" ///
	|school=="COAHII" ///
	|school=="Codman"|school=="DCA"|school=="EdBrooke"|school=="EdBrooke2"|school=="EdBrooke3"|school=="Excel" ///
	|school=="Excel3"|school=="GroveH"|school=="KIPP_BOS"|school=="MATCH_HS"|school=="MATCH_MS"|school=="RoxPrep" ///
	|school=="UncommonSchools"|school=="UPAcademy"|school=="BridgeB"|school=="Conserv"|school=="MATCH_ES"|school=="NHCS"
	
gen byte from_new=1 

gen urban_lottery=0
*add excel 2 here from chelsea SRC
replace urban_lottery=1 if boston_lottery==1|school=="Global"|school=="KIPP_Lynn" |school=="SalemAc"
	
gen notboston_lottery=0
*ChrisP:
gen noturban_lottery=0

replace notboston=1 if boston_lottery==0
*ChrisP:
replace noturban=1 if urban_lottery==0


	// drop if sasid == 1013995811 & grade==5   
	// duplicates drop will probably need to add this in somewhere else
save "new_BU_lottofiles.dta", replace
}

if "${bostondata}"=="1"{
*read in the old master file
use  "$charter/Full Charter Lottery Master File June 2010_withcorrections.dta", replace
*get rid of APR and rox prep that are above
drop if school=="Academy of Pacific Rim"
drop if school=="Roxbury Prep" &applicationyear>=2006
rename applicationyear year
rename gradeapplying grade
destring grade, replace force
destring sasid, replace
keep  outofboston ndlotto  lotterynumber  waitlistnumber   basedonschool cantmatch offeredadmission  sasid siblingstatus school year  grade offeredadmissioninitianlly 
gen boston_lottery=1
gen lateapplicant=0
replace lateapplicant=1 if  ndlotto=="x" 
gen outofarea=0
replace outofarea=1 if outofboston~="" 
drop outofboston ndlotto
replace basedon="1" if basedon=="YES"|basedon=="Yes"
destring basedon, replace force

*sibling stuff;
#delimit;
replace siblingstatus="Yes" if lotterynumber=="259 (sibling)";
gen sibling=0;
replace sibling=1 if siblingstatus=="Apllied/Enrolled" | siblingstatus=="Applied/Enrolled" | siblingstatus=="Enrolled" |
	 siblingstatus=="Enrolled in 6th" | siblingstatus=="Enrolled in 7th grade" | siblingstatus=="Yes" |
	  siblingstatus=="applied/enrolled" | siblingstatus=="applying/Enrolled" | siblingstatus=="enrolled" | 
	  siblingstatus=="sibling enrolled" | siblingstatus=="x" | siblingstatus=="SIBLING"|siblingstatus=="Enrolled"|
	  siblingstatus=="y"|siblingstatus=="Yes"|siblingstatus=="YES";
	  
gen siblingapplying=0;
replace siblingapplying=1 if siblingstatus=="7th grade applicant" | siblingstatus=="Applying" | siblingstatus=="Both labelled 153 " |
	 siblingstatus=="applying" | siblingstatus=="applying for grade 5" | siblingstatus=="applying for grade 6 " | siblingstatus=="applying for grade 6" |
	  siblingstatus=="applying for grade 6 and grade 7" | siblingstatus=="applying for grade 7" | 
	  siblingstatus=="applying for grade 8" | siblingstatus=="applyinh" | siblingstatus=="sibling applying" | siblingstatus=="Applying" | siblingstatus=="applying";  
#delimit cr

drop siblingstatus
**********************
*OFFER VARIABLES
*************************
*Clean up lottery number and waitlist number

*Some specific cases were messed up
replace lotterynumber="400" if lotterynumber=="383/400 double entry"
replace lotterynumber="259" if lotterynumber=="259 (sibling)"
replace lotterynumber=" " if lotterynumber=="late application"

destring waitlistnumber, gen(waitnum) force
destring lotterynumber, generate(lotnum) force

label variable lotnum "Numeric Lottery Number" 
label variable waitnum "Numeric Waitlist Number"	
replace  offeredadmission="No" if offeredadmission==""
replace offeredadmission="No" if offeredadmission=="REALLY IN 6TH"

*EVER OFFER
tab offeredadmission
gen offer=0
replace offer=1 if offeredadmission=="Yes"|offeredadmission=="yes"|offeredadmission=="1"
*Note:  Brooke messed up. All should be offered according to lottery log
replace offer=1 if school=="Edward Brooke" & year==2006

gen initial_offer=.

*boston collegiate 2002
replace initial_offer=1 if school=="Boston Collegiate"&year==2002&lotnum>=1&lotnum<=40&lotnum~=.
replace initial_offer=0 if initial_offer!=1 & school=="Boston Collegiate"&year==2002
*boston collegiate 2003
replace initial_offer=1 if school=="Boston Collegiate"&year==2003&lotnum>=1&lotnum<=66&lotnum~=.
replace initial_offer=0 if initial_offer!=1 & school=="Boston Collegiate"&year==2003
*boston collegiate 2004
replace initial_offer=1 if school=="Boston Collegiate"&year==2004&lotnum>=1&lotnum<=66&lotnum~=.
replace initial_offer=0 if initial_offer!=1 & school=="Boston Collegiate"&year==2004
*boston collegiate 2005 
replace initial_offer=1 if school=="Boston Collegiate"&year==2005&lotnum>=1&lotnum<=66&lotnum~=.
replace initial_offer=0 if initial_offer!=1 & school=="Boston Collegiate"&year==2005
*boston collegeiate 2006
replace initial_offer=1 if school=="Boston Collegiate"&year==2006&lotnum>=1&lotnum<=66&lotnum~=.
replace initial_offer=0 if initial_offer!=1 & school=="Boston Collegiate"&year==2006
*boston prep 2005 
replace initial_offer=1 if school=="Boston Prep"&year==2005&lotnum>=1&lotnum<=98&lotnum~=.
replace initial_offer=0 if initial_offer!=1 & school=="Boston Prep"&year==2005
*boston prep 2006
replace initial_offer=1 if school=="Boston Prep"&year==2006&lotnum>=1&lotnum<=80&lotnum~=.
replace initial_offer=0 if initial_offer!=1 & school=="Boston Prep"&year==2006
*boston prep 2007
replace initial_offer=1 if school=="Boston Prep" &year==2007 & lotnum>=1 & lotnum<=100 & lotnum!=.
replace initial_offer=0 if initial_offer!=1 & school=="Boston Prep" &year==2007
*Edward Brooke 2006
replace initial_offer=1 if school=="Edward Brooke" & year==2006 & lotnum<=39 & lotnum!=.
replace initial_offer=0 if initial_offer!=1 & school=="Edward Brooke" & year==2006
*Edit by Chris 5/24/10:  Add additional MATCH as per e-mail from Julia Manoli April 14 2010 (2005 and 2006)
*match 2002
replace initial_offer=1 if school=="MATCH"&year==2002&lotnum>=1&lotnum<=61&lotnum~=.
replace initial_offer=0 if initial_offer!=1 & school=="MATCH" & year==2002
*match 2003
replace initial_offer=1 if school=="MATCH"&year==2003&lotnum>=1&lotnum<=72&lotnum~=.
replace initial_offer=0 if initial_offer!=1 & school=="MATCH"&year==2003
*match 2004
replace initial_offer=1 if school=="MATCH"&year==2004&lotnum>=1&lotnum<=70&lotnum~=.
replace initial_offer=0 if initial_offer!=1 & school=="MATCH"&year==2004
*match 2005
replace initial_offer=1 if school=="MATCH"&year==2005&lotnum>=1&lotnum<=70&lotnum~=.
replace initial_offer=0 if initial_offer!=1 & school=="MATCH"&year==2005
*match 2006
replace initial_offer=1 if school=="MATCH"&year==2006&lotnum>=1&lotnum<=65&lotnum~=.
replace initial_offer=0 if initial_offer!=1 & school=="MATCH"&year==2006
*hca 2003
replace initial_offer=1 if school=="Health Careers"&year==2003&lotnum>=1&lotnum<=48&lotnum~=.
replace initial_offer=0 if initial_offer!=1 & school=="Health Careers"&year==2003
*hca 2004
replace initial_offer=1 if school=="Health Careers"&year==2004&lotnum>=1&lotnum<=88&lotnum~=.
replace initial_offer=0 if initial_offer!=1 & school=="Health Careers"&year==2004
*hca 2005
replace initial_offer=1 if school=="Health Careers"&year==2005&lotnum>=2&lotnum<=55&lotnum~=.
replace initial_offer=0 if initial_offer!=1 & school=="Health Careers"&year==2005
*hca 2006
replace initial_offer=1 if school=="Health Careers" &year==2006 & lotnum>=1 & lotnum<=70 & lotnum!=.
replace initial_offer=0 if initial_offer!=1 & school=="Health Careers" & year==2006
*coah 2002
replace initial_offer=1 if school=="City on a Hill"&year==2002&lotnum>=1&lotnum<=100&lotnum~=.
replace initial_offer=0 if initial_offer!=1 & school=="City on a Hill"&year==2002
*coah 2004
replace initial_offer=1 if school=="City on a Hill"&year==2004&lotnum>=1&lotnum<=70&lotnum~=.
replace initial_offer=0 if initial_offer!=1 & school=="City on a Hill"&year==2004
*coah 2005
replace initial_offer=1 if school=="City on a Hill"&year==2005&lotnum>=1&lotnum<=200&lotnum~=.
replace initial_offer=0 if initial_offer!=1 & school=="City on a Hill"&year==2005
*coah 2006
replace initial_offer=1 if school=="City on a Hill" &year==2006 & lotnum>=1 & lotnum<=225 & lotnum!=.
replace initial_offer=0 if initial_offer!=1 & school=="City on a Hill" & year==2006
*codman 2004
replace initial_offer=1 if school=="Codman"&year==2004&lotnum>=1&lotnum<=41&lotnum~=.
replace initial_offer=0 if initial_offer!=1 & school=="Codman"&year==2004

drop offered*
drop lot* wait*

replace school="BosCol" if school=="Boston Collegiate"
replace school="BosPrep" if school=="Boston Prep"
replace school="COAH" if school=="City on a Hill"
replace school="MATCH_HS" if school=="MATCH"
replace school="RoxPrep" if school=="Roxbury Prep"
replace school="EdBrooke" if school=="Edward Brooke"


*As of July 18 as per Josh, drop HCA
drop if school=="HCA"
drop if school=="Health Careers" /* added by EMS 8-19-2013 */

format sasid %12.0f
gen from_old=1

save old_lottofiles.dta, replace

}

*** combine files, last chance at matching

use "$save\new_BU_lottofiles.dta", clear
append using "$save\old_lottofiles.dta"
qui compress

g obs = _n
save "$save/appended.dta", replace

*Final attempt to match any remaining unmatched
if "$match"=="1"{
use "$save/appended.dta"
keep obs sasid  caplast capfirst year grade mname  
keep if sasid==.
replace caplast=trim(upper(caplast))
replace capfirst=trim(upper(capfirst))
tostring mname, replace
replace mname=trim(upper(mname))
ren caplast lastname
ren capfirst firstname
replace lastname=subinstr(lastname,", JR.","",.)
replace lastname=subinstr(lastname,"'","",.)
replace lastname=subinstr(lastname," ","",.)
foreach vars in firstname mname lastname {
	replace `vars' = subinstr(`vars',"`","",.)
	replace `vars' = subinstr(`vars',`"""',"",.)
}
replace year = year+1  if grade<=8
replace year = year+2 if grade==9 //testyear
g schoolyr=year

sort obs

gen entrygrade = grade

save "$save/nosasid.dta", replace

*Change by CT 5/18/10 - Must be after the initial save so that "$save/nosasid.dta" preserves all records
duplicates tag lastname firstname year grade, gen(dup)
drop if dup>=1
drop dup
sort year grade lastname firstname 

merge 1:1 year grade lastname firstname using "${dir}/uniquenames.dta" , keep(1 3)
drop _merge
drop year grade
sort obs
tempfile thisyear
save "`thisyear'"

use "$save/nosasid.dta", clear
if grade==9{
	replace year=year-2
}
if grade<=8{
	replace year=year-1
	}
replace grade=grade-1

*Change by CT 5/18/10 - Must be repeated to eliminate duplicates
duplicates tag year grade lastname firstname, gen(dup)
drop if dup>=1
drop dup
sort year grade lastname firstname 

merge 1:1 year grade lastname firstname using "${dir}/uniquenames.dta" , keep(1 3)
drop _merge
drop year grade
sort obs
tempfile lastyear
save "`lastyear'"

*now with middle name 
use "$save/nosasid.dta", clear
duplicates tag lastname firstname mname , gen(dup)
drop if dup>=1
drop dup
merge 1:1 lastname firstname mname  using "${dir}/uniquenames_middle.dta" , keep(1 3)
drop _merge
drop year grade
sort obs
tempfile mname
save "`mname'"

use "$save/nosasid.dta", clear
drop if obs==.
merge 1:1 obs using "`lastyear'"
drop _merge
drop if obs==.
merge 1:1 obs using "`thisyear'", update replace
drop _merge
drop if obs==.
merge 1:1 obs using "`mname'", update replace
drop _merge
drop if obs==.
keep  obs lastname firstname mname  entrygrade sasid schoolyr
ren sasid statasasid
sort obs
** Save the unique matches file
save "$save\allremaining", replace


/* ***************************************************************************************************************************
	Fuzzy Match*/

*************************************************************************************************************************** 
* Only keep obs w/o matched sasids
drop if statasasid != . 


******************************************************************************************************************
*********** First remove - and ' and spaces from names and if there is a perfect match, then consider it a match. 
******************************************************************************************************************
local vars firstname lastname
foreach var1 in `vars' {
	gen `var1'_hyphen = regexm(`var1',"-")
	gen `var1'_space = regexm(`var1'," ")
	gen `var1'_apost = regexm(`var1',"'")
}
gen trimmed_formatted = 1 if firstname_hyphen == 1 | firstname_space == 1 | firstname_apost == 1 | lastname_hyphen == 1 | lastname_space == 1 | lastname_apost == 1
drop firstname_hyphen firstname_space firstname_apost lastname_hyphen lastname_space lastname_apost

local vars firstname lastname
foreach var1 in `vars' {
	replace `var1' =subinstr(`var1', "-", "",.)
	replace `var1' =subinstr(`var1', " ", "",.)
	replace `var1' =subinstr(`var1', "'", "",.)
}
sort obs
tempfile temp_formatted
save `temp_formatted'


use "${dir}\fuzzymatchnames.dta", clear

local vars firstname lastname
foreach var1 in `vars' {
	gen `var1'_hyphen = regexm(`var1',"-")
	gen `var1'_space = regexm(`var1'," ")
	gen `var1'_apost = regexm(`var1',"'")
}
gen trimmed_SIMS = 1 if firstname_hyphen == 1 | firstname_space == 1 | firstname_apost == 1 | lastname_hyphen == 1 | lastname_space == 1 | lastname_apost == 1
drop firstname_hyphen firstname_space firstname_apost lastname_hyphen lastname_space lastname_apost

	local vars firstname lastname
	foreach var1 in `vars' {
		replace `var1' =subinstr(`var1', "-", "",.)
		replace `var1' =subinstr(`var1', " ", "",.)
		replace `var1' =subinstr(`var1', "'", "",.)
	}
sort ma_obs
tempfile temp_SIMS
save `temp_SIMS'

use `temp_formatted'

reclink lastname firstname using `temp_SIMS', idmaster(obs) idusing(ma_obs) gen(matchqual) required(lastname firstname) orblock(lastname firstname)

keep if trimmed_formatted == 1 | trimmed_SIMS == 1

	gen exactyr_match = .
	replace exactyr_match = 1 if year == schoolyr & grade == entrygrade
	forvalues i = 1(1)6 {
		replace exactyr_match = 1 if year == schoolyr-`i' & grade == entrygrade - `i'
	}
	forvalues i = 1(1)7 {
		replace exactyr_match = 1 if year == schoolyr+`i' & grade == entrygrade + `i'
	}

// Consider it a match if they are the right age and had an exact match on first and last name without hyphens, spaces, and apostrophes
keep if exactyr_match == 1 & matchqual == 1
capture duplicates drop obs sasid, force // You will be able to catch if it's not unique by obs and sasid later

* we'll also drop the observations that match with multiple sasids, leaving them to handmatching/fuzzy matching
bys obs: gen obs_ct = _N
capture drop if obs_ct > 1
capture drop obs_ct 

sort obs
keep obs lastname firstname mname dob sasid
save "${save}\allremaining_fuzzy_compact_a", replace


******************************************************************************************************************
*********** Remove JR and JR. and Determine a Match if first and last are then identical and year is correct
* Note that we only want to keep those who have JR or JR. at the end of their first or last name. Since JRs happen 
*    both datasets, we need to just tag which we have removed JRs from and only keep the match if there was a JR in
* 	 at least one of the datasets
******************************************************************************************************************
use "${save}\allremaining", clear
sort obs
* Only keep obs w/o matched sasids
drop if statasasid != . 


* Keep only those that have JR and JR. at the end of first or last name
local vars firstname lastname
foreach var1 in `vars' {
	gen `var1'_JR2 = substr(`var1',-2,2) 
	gen `var1'_JR3 = substr(`var1',-3,3) 
}

gen JR_formatted = 1 if firstname_JR2 == "JR" | firstname_JR3 == "JR." | lastname_JR2 == "JR" | lastname_JR3 == "JR."
drop firstname_JR2 firstname_JR3 lastname_JR2 lastname_JR3

** JR and JR. in first and last name
local vars firstname lastname
foreach var1 in `vars' {
	replace `var1' = subinstr(`var1',"JR","",length(lastname)-2)
	replace `var1' = subinstr(`var1',".","",length(lastname)-1)
}
sort obs
tempfile tempJR_formatted
save `tempJR_formatted'


*** NOW REPEAT FOR THE SIMS DATA
	use "${dir}\fuzzymatchnames.dta", clear

	* Keep only those that have JR and JR. at the end of first or last name
	local vars firstname lastname
	foreach var1 in `vars' {
		gen `var1'_JR2 = substr(`var1',-2,2) 
		gen `var1'_JR3 = substr(`var1',-3,3) 
	}

	** JR and JR. in first and last name
	local vars firstname lastname
	foreach var1 in `vars' {
		replace `var1' = subinstr(`var1',"JR","",length(lastname)-2)
		replace `var1' = subinstr(`var1',".","",length(lastname)-1)
	}
	gen JR_SIMS = 1 if firstname_JR2 == "JR" | firstname_JR3 == "JR." | lastname_JR2 == "JR" | lastname_JR3 == "JR."
	drop firstname_JR2 firstname_JR3 lastname_JR2 lastname_JR3
		sort ma_obs
	tempfile tempJR_SIMS
	save `tempJR_SIMS'

   use `tempJR_formatted'

reclink lastname firstname using `tempJR_SIMS', idmaster(obs) idusing(ma_obs) gen(matchqual) required(lastname firstname) orblock(lastname firstname)

keep if JR_formatted == 1 | JR_SIMS == 1

	gen exactyr_match = .
	replace exactyr_match = 1 if year == schoolyr & grade == entrygrade
	forvalues i = 1(1)6 {
		replace exactyr_match = 1 if year == schoolyr-`i' & grade == entrygrade - `i'
	}
	forvalues i = 1(1)7 {
		replace exactyr_match = 1 if year == schoolyr+`i' & grade == entrygrade + `i'
	}

// Consider it a match if they are the right age and had an exact match on first and last name without hyphens, spaces, and apostrophes
keep if exactyr_match == 1 & matchqual == 1
capture duplicates drop obs sasid, force // You will be able to catch if it's not unique by obs and sasid later 

* we'll also drop the observations that match with multiple sasids, leaving them to handmatching/fuzzy matching
bys obs: gen obs_ct = _N
capture drop if obs_ct > 1
capture drop obs_ct 
sort obs
keep obs lastname firstname mname dob sasid
save "${save}\allremaining_fuzzy_compact_b", replace

******************************************************************************************************************
*********** Continue with Fuzzy Matching *************************************************************************
******************************************************************************************************************

use "${save}\allremaining", clear
sort obs
* Only keep obs w/o matched sasids
drop if statasasid != . 

* Remove the names you just matched by removing hyphens
	merge 1:1 obs using "${save}\allremaining_fuzzy_compact_a"
	drop if _merge == 3
	drop _merge
	
* Remove the names you just matched by removing hyphens
	merge 1:1 obs using "${save}\allremaining_fuzzy_compact_b"
	drop if _merge == 3
	drop _merge
	
drop sasid statasasid
	
foreach vars in lastname firstname mname {
	replace `vars' = subinstr(`vars',"`","",.)
}
sort obs
* Unique observation counter in fuzzymatchnames.dta is called ma_obs
*reclink lastname firstname mname dob using "${dir}\fuzzymatchnames.dta", idmaster(obs) idusing(ma_obs) gen(matchqual) wmatch(10 10 2 2)
*not using DOB or mname here
reclink lastname firstname using "${dir}\fuzzymatchnames.dta", idmaster(obs) idusing(ma_obs) gen(matchqual) wmatch(10 10 )

* Only keep matches with reasonable year ranges. For middle school - want to be within 1 year of the correct entry. 
	* For high school - want to be within 2 years of the correct entry year.

	gen exactyr_match = .
	replace exactyr_match = 1 if year == schoolyr & grade == entrygrade
	forvalues i = 1(1)6 {
		replace exactyr_match = 1 if year == schoolyr-`i' & grade == entrygrade - `i'
	}
	forvalues i = 1(1)7 {
		replace exactyr_match = 1 if year == schoolyr+`i' & grade == entrygrade + `i'
	}
	
	gen reasonableyr_match = . // If off by one year
		forvalues i = 0(1)6 {
			// Grade says entering one year earlier than should
			replace reasonableyr_match = 1 if year == schoolyr-`i' & grade == entrygrade - `i' - 1
			
			// Grade says entering one year later than should
			replace reasonableyr_match = 1 if year == schoolyr-`i' & grade == entrygrade - `i' + 1	
			
		}
		
		forvalues i = 1(1)7 {
			// Grade says entering one year earlier than should
			replace reasonableyr_match = 1 if year == schoolyr + `i' & grade == entrygrade + `i' - 1
			
			// Grade says entering one year later than should
			replace reasonableyr_match = 1 if year == schoolyr + `i' & grade == entrygrade + `i' + 1			
		}
		
	gen twoyr_match = .
	if entrygrade>=8 { // If off by two years - only turns on for high school
			forvalues i = 0(1)6 {
			// Grade says entering one year earlier than should
			replace twoyr_match = 1 if year == schoolyr-`i' & grade == entrygrade - `i' - 2
			
			// Grade says entering one year later than should
			replace twoyr_match = 1 if year == schoolyr-`i' & grade == entrygrade - `i' + 2			
		}
		
		forvalues i = 1(1)7 {
			// Grade says entering one year earlier than should
			replace twoyr_match = 1 if year == schoolyr + `i' & grade == entrygrade + `i' - 2
			
			// Grade says entering one year later than should
			replace twoyr_match = 1 if year == schoolyr + `i' & grade == entrygrade + `i' + 2			
		}
			
	}
	
	* drop if there is no reasonable match of years
	drop if exactyr_match == . & reasonableyr_match == . & twoyr_match == .
	

save "${save}\allremaining_fuzzy_full", replace 
use "${save}\allremaining_fuzzy_full", clear 

* Only keep the non duplicate observations to investigate:

* duplicates drop obs sasid, force - I removed this because we want to see if the same kid has the same sasid for many years and then
* one year where he changes sasids
* The point of this command was originally:
	* To reduce the number of observations for each person. This will give us just one observation per obs and sasid so if it is assigning the same 
	* sasid to all of the observations, STATA will arbitrarily just show one of them. 
gen keepsasid = .
gsort -matchqual exactyr_match reasonableyr_match

*unique obs sasid pairs
bys obs sasid: g first = _n==1
keep if first==1
gsort obs sasid

*SRC investigated 9/5/19 these are mostly good but a few ties to break
	duplicates tag obs  , gen(dup)
replace keepsasid=1 if dup==0
replace keepsasid=1 if exact==1&keepsasid==.
keep if keepsasid==1
drop dup
duplicates tag obs, gen(dup)
keep if dup==0
*another check
replace keepsasid=.
replace keepsasid=1 if exact==1&keepsasid==.
replace keepsasid=1 if reasonableyr_match==1&keepsasid==.&matchqual==1
gsort -matchqual exactyr_match reasonableyr_match

*hand match here
order keepsasid lastname Ulastname firstname Ufirstname matchqual town_res exactyr_match reasonableyr_match twoyr_match 
*br if keepsasid!=1
replace keepsasid = 1 if keepsasid==.& matchqual>=0.9691 //SRC review 1/25/20 -- these look good
*hand keep the rest that look like good matches
egen handmatch=anymatch(obs), values(7438 5801 2247 51095 48556 7142 46921 ///
14888 15799 57892 54148 47851 45492 40025 39257 51252 14969 13371 35941 20225 13982 ///
20430 19518 7324 47242 19584 19663 7071 8095 49803 21945 5415 )
replace keepsasid=1 if handmatch==1
drop handmatch

save "${save}\allremaining_fuzzy_compact_v2_c", replace


 use "${save}\allremaining_fuzzy_compact_a", clear
 merge 1:1 obs using "${save}\allremaining"
 rename _merge hyphensmatch
 save "${save}\allremaining_Matched", replace
 
 use "${save}\allremaining_fuzzy_compact_b", clear
 merge 1:1 obs using "${save}\allremaining_Matched"
 rename _merge JRmatch
 save "${save}\allremaining_Matched", replace
 
 use "${save}\allremaining_fuzzy_compact_v2_c", clear
 drop _merge
 
 merge 1:1 obs using "${save}\allremaining_Matched"
 rename _merge fuzzymatch
 save "${save}\allremaining_Matched", replace
 keep if  keepsasid == 1
 sort obs
 
 * Now statasasid has all of the initial matches and sasid has the fuzzy matches
rename sasid fuzzysasid 
 g double combo_sasid=statasasid //MUST FORMAT AS DOUBLE TO KEEP ALL INFORMATION!!!! 
  format combo %12.0f
replace combo=fuzzysasid if combo==.
keep if combo!=.
 save "${save}\allremaining_Matched", replace
 
}	

if "$cleanup"=="1"{
use "$save/appended.dta", clear
merge 1:1 obs using "${save}\allremaining_Matched", keepusing(obs combo_sasid) nogen
replace sasid = combo_sasid if sasid==.
drop combo_sasid


*only complete dups are due to missing sasids, some dup sasids b/c of multiple applications

drop from*

replace notbos=0 if notbos==.
replace noturb=0 if noturb== .		
replace urban_lottery=1 if boston_lottery==1
	*not urban are: Sturgis, FourRiv, Parker, Marble, Innov, PVPA, CapeCod


***a couple of kids apply for the wrong grade level (both HS and MS)
replace disqualified=1 if sasid==1016636017&school=="MATCH_HS"&year==2005&grade==9
replace disqualified=1 if sasid==1004943511&year==2011&grade==9
replace disqualified=1 if sasid==1051329713 & grade==6
replace disqualified=1 if sasid==1051944514 & grade==6 
replace disqualified=1 if sasid==1076561606 & grade==6
replace disqualified=1 if sasid==1086720716 & grade==6
replace disqualified=1 if sasid==1004671608 & grade==6

	***a couple of kids apply for the wrong grade level (both HS and MS/ES)
	gen hs = (school=="MATCH_HS"|school=="COAH"|school=="COAHII"|school=="BGA"|(school=="Codman"&grade==9)|(school=="BosPrep"&grade==9))  /* need to look into 1001256211*/
	bys sasid year: egen x = mean(hs)
	// Browse if sasid is not alway middle school and not always hs 
	sort sasid
	*br if x ~= 0 & x ~= 1 & sasid ~=.
	drop x  
	
	* do the same for ES and MS
	gen es = (grade>=-1 & grade<=4)
		bys sasid year: egen x = mean(es)
		sort sasid
		drop x
		
*D.SUN 04-01-2013: 
*the judgment call here on which to be dropped is based on year-of-birth from SIMS
*check "LTO lotto app year duplicates correction" excel for details
replace disqualified=1 if sasid==1004671608&school=="BosPrep"&year==2007&grade==6
replace disqualified=1 if sasid==1016612314&school=="BosPrep"&year==2009&grade==6
replace disqualified=1 if sasid==1016636017&school=="MATCH_HS"&year==2005&grade==9
*the following line should be boscol =1 in the end
replace disqualified=1 if sasid==1035417419&school=="BosPrep"&year==2007&grade==6  
replace disqualified=1 if sasid==1055848512&school=="BosPrep"&year==2005&grade==6
replace disqualified=1 if sasid==1056919512&school=="BosPrep"&year==2008&grade==6
replace disqualified=1 if sasid==1067007511&school=="BosPrep"&year==2006&grade==6
replace disqualified=1 if sasid==1076561606&school=="BosPrep"&year==2007&grade==6
replace disqualified=1 if sasid==1091949514&school=="APR"&year==2008&grade==5
replace disqualified=1 if sasid==1091949514&school=="COAH"&year==2008&grade==9
*the following line should be boscol =1 in the end
replace disqualified=1 if sasid==1093516515&school=="BosPrep"&year==2007&grade==6 
*replace disqualified=1 if sasid==1090912207&school=="BosPrep"&year==2006&grade==6
replace disqualified=1 if sasid==1051615711 & year==2007 & school=="BosPrep" & grade==6
replace disqualified=1 if sasid==1059519511 & year==2011 & grade==6 & school=="UPAcademy"
replace disqualified=1 if sasid==1018536322 & year == 2013 & grade==9
replace disqualified=1 if sasid==1048628722 & year==2013 & grade==9

replace disqualified=1 if sasid==1010215528 & year==2012 & grade==0
replace disqualified=1 if sasid==1037722613 & year==2007 & grade==0
replace disqualified=1 if sasid==1058098410 & year==2013 & grade==0
replace disqualified=1 if sasid==1083591011 & year==2013 & grade==0

replace disqualified=1 if sasid==1025329228 & year==2011 & grade==1
replace disqualified=1 if sasid==1016907625 & year==2012 & grade==0
replace disqualified=1 if sasid==1018639620 & year==2013 & grade==0
replace disqualified=1 if sasid==1035994217 & year==2012 & grade==0
replace disqualified=1 if sasid==1052290618 & year==2012 & grade==0

replace disqualified=1 if sasid==1065011624 & year==2011 & grade==2
replace disqualified=1 if sasid==1025813024 & year==2012 & grade==1

* One kid applied to different schools/grades in the same year - disqualify from the one that is the wrong grade * new as of 3/22/2015
replace disqualified=1 if sasid==1013995811 & year==2012 & grade==4

*DS 8/16/2013: detect late-applicants/siblings and flag them out -- we do not want them to be in the applicant pool !
replace sasid=. if basedon==1 /* basedon==1 when decision to code sasid was based on town */
gen dup=1 if cantmatch=="dup" | duplicate==1 | duplicates==1
cap drop cantmatch
gen unmatched = (sasid==.)
gen y = (disqualified==1|lateapplicant==1|outofarea==1|sibling==1|unmatched==1|dup==1)
save "$data_clean/for_demand_analysis.dta", replace


*Generate application variabless
* y is the all of the applicants we drop (sib, late, disq, outofarea). We don't give them an apply coding.
*CW 1 27 2012:  ADD APPLY SALEM ACADEMY
gen applyAPR=1 if school=="APR" & y!=1
gen applyBosCol=1 if school=="BosCol" & y!=1
gen applyBGA=1 if school=="BGA" & y!=1 
gen applyBosPrep=1 if school=="BosPrep" & y!=1
gen applyCoaH=1 if school=="COAH" & y!=1 
gen applyCoaHII=1 if school=="COAHII" & y!=1
gen applyCodman=1 if school=="Codman" & y!=1 
gen applyDCA=1 if school=="DCA" & y!=1
gen applyEdBrooke=1 if school=="EdBrooke" & y!=1 
gen applyEdBrooke2=1 if school=="EdBrooke2" & y!=1
gen applyEdBrooke3=1 if school=="EdBrooke3" & y!=1
gen applyExcel=1 if school=="Excel" & y!=1
gen applyExcel3=1 if school=="Excel3" & y!=1
gen applyGroveH=1 if school == "GroveH" & y!=1
gen applyKippBos=1 if school=="KIPP_BOS" & y!=1
gen applyMATCH_HS=1 if school=="MATCH_HS" & y!=1
gen applyMATCH_MS=1 if school=="MATCH_MS" & y!=1
gen applyRoxPrep=1 if school=="RoxPrep" & y!=1
gen applyUncommon=1 if school=="UncommonSchools" & y!=1
gen applyUP=1 if school=="UPAcademy" & y!=1 

gen applyDP=1 if school=="UncommonSchools" & y!=1
replace applyRoxPrep=1 if school=="UncommonSchools" & y!=1
replace applyGroveH=1 if school=="UncommonSchools" & y!=1

gen applyMATCH_ES=1 if school=="MATCH_ES" & y!=1
gen applyBridgeB=1 if school=="BridgeB" & y!=1
gen applyConserv=1 if school=="Conserv" & y!=1
gen applyNHCS=1 if school=="NHCS" & y!=1

gen applyCapeCod=1 if school=="CapeCod" & y!=1
gen applyFourRiv=1 if school=="FourRiv" & y!=1
gen applyGlobal=1 if school=="Global" & y!=1
gen applyInnov=1 if school=="Innovation" & y!=1
gen applyMarble=1 if school=="Marblehead" & y!=1
gen applyPVPA=1 if school=="PVPA" & y!=1
gen applyParker=1 if school=="Parker" & y!=1
gen applySalemAc=1 if school=="SalemAc" & y!=1
gen applySturgis=1 if school=="Sturgis" & y!=1
gen applyKIPPLynn=1 if school=="KIPP_Lynn" & y!=1
gen applyRisingTide=1 if school=="RisingTide" & y!=1


*CW 1 27 2012:  UPDATE SCHOOL NAMES, INCL SALEM ACADEMY
foreach l in APR BosCol BGA BosPrep CoaH CoaHII Codman DCA /* DP never had it's own lottery*/ ///
EdBrooke EdBrooke2 EdBrooke3 Excel Excel3 GroveH KippBos MATCH_HS ///
	 MATCH_MS RoxPrep /*Uncommon*/ UP MATCH_ES BridgeB Conserv NHCS prioritygroup ///
	 CapeCod FourRiv Global Innov Marble PVPA Parker SalemAc Sturgis KIPPLynn RisingTide {
	*individual offer variables
	gen offer`l'=offer if apply`l'==1 /* This excludes people who are siblings or disqualified from having offer_school */
		replace offer`l'=0 if offer`l'==.
	gen initial_offer`l'=initial_offer if apply`l'==1
		 replace initial_offer`l'=0 if initial_offer`l'==.
}

* substitute the better data for UCS
foreach l in RoxPrep GroveH {
	foreach offtype in offer initial_offer {
		replace `offtype'`l' = `offtype'`l'_corr if `offtype'`l'_corr!=.
		replace `offtype'`l' = 0 if `offtype'`l'==.
	}
}	
	drop *_corr
	
* Clean the first, second, and third choice schools of UCS
ren stschool ucs_firstchoice
ren ndschool ucs_secondchoice
ren rdschool ucs_thirdchoice
	gen ucs_first_DP = .	
	gen ucs_first_GH = .
	gen ucs_first_RP = .
		replace ucs_first_DP = 1 if inlist(ucs_firstchoice, "DORCHEST", "DORCHESTE","DORCHESTER","DORCHESTER PREP","DORCHESTERT PREP","DPCHS","Dorchester","Dorchester ")
		replace ucs_first_GH = 1 if inlist(ucs_firstchoice,"GORVE HALL","GROVE  HALL PREP","GROVE HALL","GROVE HALL PREP","Lucy Stone")
		replace ucs_first_RP = 1 if inlist(ucs_firstchoice,"Mission Hill","ROSBURY","ROSBURY PREP","ROXBUERY","ROXBURG","ROXBURY")
			replace ucs_first_RP = 1 if inlist(ucs_firstchoice,"ROXBURY CHARTER","ROXBURY PERP","ROXBURY PRE","ROXBURY PREP","ROXBURY PREPATORY") 
			
	drop ucs_firstchoice ucs_secondchoice ucs_thirdchoice		 
			
/* Want to drop from dataset 
	- people who just applied to lotteries that were not oversubscribed
	- people who applied to not oversubscribed lotteries and only got an offer at the not oversubscribed
		lottery should have their "offer" and "initial offer" turned to 0
		
		NEED TO FINISH THIS SO THAT offer and initial_offer (the general variables, not school specific) are correct
	EMS 8-30-2013 
	
	*** 3/20/2015: EMS- Since now I am using initial and ever offers as separate instruments, I can keep the non-oversub-
	scribed lotteries' initial offers
	*/
// Not oversubscribed: EdBrookeI 2006; COAH 2002; COAH 2004; Codman 2004; Bos Prep 2005; COAH 2008; BGA 2012; BGA 2014; COAH2013; UP 2014
// No ever offer:
// No initial offer: EdBrookeI 2012, Conservatory 2010
	
	replace offerEdBrooke=0 if year==2006
	*replace initial_offerEdBrooke=0 if year==2006 // older code used to have this turned on, changed 3/20/2015
	
	replace offerCoaH=0 if year==2002
	*replace initial_offerCoaH=0 if year==2002

	replace offerCoaH=0 if year==2004
	*replace initial_offerCoaH=0 if year==2004

	replace offerCoaH=0 if year==2008
	*replace initial_offerCoaH=0 if year==2008

	replace offerCodman=0 if year==2004
	*replace initial_offerCodman=0 if year==2004

	replace offerBosPrep=0 if year==2005
	*replace initial_offerBosPrep=0 if year==2005

	replace offerBGA=0 if year==2012
	*replace initial_offerBGA=0 if year==2014

	replace offerBGA=0 if year==2012
	
	replace offerCoaH=0 if year==2013
	
	replace offerUP=0 if year==2014
	
	replace initial_offerEdBrooke=0 if year==2012
	
	replace initial_offerConserv=0 if year==2012
	

foreach v of varlist risk_*{
	bys sasid year: egen max`v'=max(`v') 
	replace max`v'=. if sasid==.
	replace `v'=1 if max`v'!=. & max`v'!=0
	drop max`v'
	replace `v'=0 if `v'==.
}
foreach l in APR BosCol BGA BosPrep CoaH CoaHII Codman DCA DP EdBrooke EdBrooke2 EdBrooke3 Excel Excel3 GroveH KippBos MATCH_HS ///
	 MATCH_MS RoxPrep Uncommon UP MATCH_ES BridgeB Conserv NHCS prioritygroup ///
	 CapeCod FourRiv Global Innov Marble PVPA Parker SalemAc Sturgis KIPPLynn RisingTide {
	
	 *across observations 
	bys sasid year: egen max`l'=max(apply`l') 
	replace max`l'=. if sasid==.
	replace apply`l'=1 if max`l'!=. & max`l'!=0
	drop max`l'
	replace apply`l'=0 if apply`l'==.
	
	* Making individual school offer variables same across observations for each sasid EMS
	bys sasid year: egen max_offer`l'=max(offer`l')
		replace max_offer`l'=. if sasid==.
		replace offer`l'=1 if max_offer`l'!=.
		replace offer`l'=. if max_offer`l'==0
		drop max_offer`l'
	bys sasid year: egen max_initial_offer`l'=max(initial_offer`l')
		replace max_initial_offer`l'=. if sasid==.
		replace initial_offer`l'=1 if max_initial_offer`l'==1
		replace initial_offer`l'=. if max_initial_offer`l'==0
		drop max_initial_offer`l'
		
}

* make UCS first choice data consistent across sasid
	foreach sch in RP DP GH {
		bys sasid year: egen max_ucs_first_`sch'=max(ucs_first_`sch')
		replace ucs_first_`sch' = max_ucs_first_`sch'
		drop max_ucs_first_`sch'
	}	

* make consistent across observations:
foreach l in `shortnames' {
	bys sasid year: egen max`l' = max(`l')
		replace max`l' = . if sasid==.
		replace `l'=1 if max`l'!=. & max`l'!=0
		drop max`l'
		replace `l'=0 if `l'==.
}		

* To check that students haven't applied to different grades that don't make sense: drop if y==1 here and then
* check the tab mingrmaxgr below

bys sasid year: egen mingrade = min(grade) if y!=1 // the if y!=1 added 3/20/2015
by sasid year: egen maxgrade = max(grade) if y!=1
tab mingr maxgr
replace grade=56 if mingrade==5&maxgrade==6

* Identify students who apply for multiple grades for the young grades where age is more flexible
* These students should be in the same risk set
* This is the same as how you group students together who apply for both 5th and 6th grade
replace grade=-0.5 if mingrade==-1 & maxgrade==0
replace grade=0.5 if mingrade==0 & maxgrade==1

*one year of sturgis has grade at time of app, not grade applying for
replace grade=9 if grade==8

drop mingr maxgr

*do 9th and other gradesseparately
/*Will now save files that will later be used in the lottery audit file to create the Sample Restrictions, SIMS Match and Outcome Samples tables*/
	
	* figure out which students applied to elem lotteries, then MS. Just flag them for now
	bys sasid : egen minyear=min(year)
	tab year minyear
	gen mult_lottoflag = 1 if year!=minyear
		bys sasid: egen mult_lottoflag2 = max(mult_lottoflag)
		*investigate the grades
		gen mult_lottoflag_ES = 1 if mult_lottoflag2==1 & grade<=4
			bys sasid: egen mult_lottoflag_ES2 = max(mult_lottoflag_ES)		
			
		bys sasid: egen mingrade = min(grade)
		bys sasid: egen maxgrade = max(grade)
		tab mingr maxgr
	
	gen firstapp=(year==minyear)
	drop minyear maxgr
	
	preserve
	keep if grade==5 | grade==6 | grade==56 | grade==7
	* before DCA was counted as middle school, now count it as elementary school for serving those younger than 5th grade 3/22/2015
g middle =1
g elem = 0 
g highplus= 0
save "middle_audit", replace

restore

preserve
keep if grade<=4
* keep if firstapp==1 - need to deal with this later
g elem=1
g middle=0 
g highplus=0
save "elem_audit", replace
restore

keep if grade>=5
*10.26.2010 SRC add middle grades to HS where appropriate
gen flag=0 // Flag is for the highplus schools - that are middle and high school and the cohorts age to be in high school 
// as of HS class of 2016
	* Now APR is 6th grade entry
		replace flag=1 if school=="APR" & (year<=2010)
	* Bos Col is 5th grade entry
	replace flag=1 if school=="BosCol" & (year<=2009)
	* BosPrep is 6th grade entry
		replace flag=1 if school=="BosPrep" & (year<=2010)
	* Match MS was 6th grade entry
	replace flag=1 if school=="MATCH_MS" & (year<=2010)
	*Four Rivers is 7th grade entry
	replace flag=1 if school=="FourRiv" &(year<=2011)
	*Parker is 7th grade entry
	replace flag=1 if school=="Parker" &(year<=2011)

/* We used to only keep the high schools and the middleplus schools for this dataset, but we will need to wait until later in 
	the program because we need to only look at the first lottery application 
* keep if grade==9|flag==1
* drop flag
*/
g highplus=1
g middle =0
g elem = 0 
save "highplus_audit", replace

foreach f in elem middle highplus {
	use "`f'_audit", clear
	
	*keep only first year in lotto for middle and high, don't want to change the MS and HS sample by including ES in this requirement yet
	*most of those dumped are those who are applying for 6th grade the year after applying for 5th grade
	
	* Drop if the student applies for multiple grades in middle school apps
	cap gen repeat_applicants= (firstapp==0 & grade>=5)
	cap drop if repeat_applicants==1 // if later you want to have LTO sample be different, then look at this - then you might want 
		* to change the risk sets and offers to just be for the high school applications
	
	cap keep if grade==9|flag==1 /* just keep the middleplus and high school students that had their first charter be a middleplus or highschool */
			/* Note: this command does NOT affect the middle school applicant dataset at all or elementary */
	drop if disqualified==1
	drop if lateapplicant==1
	drop if outofarea==1
	drop if sibling==1
	drop if unmatched==1
	drop if dup==1 // added by ES 3/20/15
	
	* for ES, drop if the student applies in multiple years in ES
	drop if firstapp==0 & grade<=4
	
	drop disqualified lateapplicant outofarea sibling unmatched sib* y basedon 
	cap drop flag 
	
	save "`f'_applicants_long", replace

	*reshape
	drop school dateoflottery dob //Elizabeth Added dropping date of lottery
	drop lotterynumber waitlistnumber // EMS added 8-19-2013 - don't need this for analysis
	duplicates drop
		count
	
	*risk sets
	*These get redone later based on sample
	
************************************************************	
	* UCS instruments
	gen instru_initial_RoxPrep = initial_offerRoxPrep * ucs_first_RP
	gen instru_initial_GroveH = initial_offerGroveH * ucs_first_GH
	gen instru_initial_DP = initial_offerDP * ucs_first_DP

	gen instru_offer_RoxPrep = offerRoxPrep * ucs_first_RP
	gen instru_offer_GroveH = offerGroveH * ucs_first_GH
	gen instru_offer_DP = offerDP * ucs_first_DP
	
	foreach off in initial offer {
		foreach sch in RoxPrep GroveH DP {
			replace instru_`off'_`sch'= 0 if instru_`off'_`sch'==.
		}
	}	
************************************************************

*Add other samples here
	foreach x in urban noturban boston notboston {
		g offer_`x'=0 if offer~=.
		g initial_offer_`x'=0 if initial_offer~=.
		replace offer_`x'=1 if `x'==1& offer==1
		replace initial_offer_`x'=1 if `x'==1& initial_offer==1
		}

	
	*offer and initial offer are 1 if you have at ANY charter
	foreach v of varlist offer initial_offer *offer_boston *offer_notboston *offer_urban *offer_noturban *_lottery risk_* {
		bys sasid year: egen max=max(`v') 
		replace `v'=1 if max==1
		drop max
		replace `v'=0 if `v'==.
	}	
	
	
	ren year yearapp
	label var yearapp "Year of Lottery Application"
	ren grade gradeapp
	label var gradeapp "Grade of Lottery Application"
	label var initial_offer "Initial offer at ANY charter school"
	label var offer "Ever offer at ANY charter school"
	label var boston_lottery "Applied to Boston charter(s)"
	label var notboston_lottery "Applied to non-Boston charter(s)" 
	
	replace gradeapp=5 if gradeapp==56	
	g proj_year12=yearapp+13-gradeapp
	label var proj_year12 "Projected HS Graduation Year"
	drop caplast capfirst
	
	
	*KEEP ONLY NEEDED VARIABLES
	keep sasid yearapp gradeapp *lottery apply* risk_* *offer* proj_year12 elem highplus middle instru* 
	drop lottery
	
	save "`f'", replace
	save "$data_clean/`f'_applicants_wide.dta", replace
	}
	*inividual grade file adjustments
use highplus, clear
	keep if gradeapp==9 /*added by ems */
	*ren d_* hs_d_*
append using middle
	*ren d_* ms_d_*
append using elem
	*ren d_* es_d_*

foreach v of varlist highplus middle elem{
	replace `v' = 0 if `v'==.
	bys sasid: egen max=max(`v')
	replace `v'=max
	drop max
}

*make offer variables mutually exclusive to make FS more interpretable
foreach v of varlist offer offer_urban offer_noturban offer_boston offer_notboston{
	ren `v' waitlist_`v'
	replace waitlist_`v' = 0 if initial_`v'==1
	}
	

order sasid yearapp gradeapp proj_year12 initial_offer_* waitlist_offer* ///
	apply* applyprioritygroup risk_* 
	
duplicates drop 
format sasid %12.0f
duplicates drop //these are folks who are in the file twice because they are both ms and HS applicants
duplicates report sasid
save "$data_clean\all_applicants_wide.dta", replace



}